[NET]: Convert net/{ipv4,ipv6,sched} to netdev_priv
[safe/jmp/linux-2.6] / net / ipv4 / ipip.c
1 /*
2  *      Linux NET3:     IP/IP protocol decoder. 
3  *
4  *      Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5  *
6  *      Authors:
7  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
8  *
9  *      Fixes:
10  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
11  *                                      a module taking up 2 pages).
12  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13  *                                      to keep ip_forward happy.
14  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
16  *              David Woodhouse :       Perform some basic ICMP handling.
17  *                                      IPIP Routing without decapsulation.
18  *              Carlos Picoto   :       GRE over IP support
19  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20  *                                      I do not want to merge them together.
21  *
22  *      This program is free software; you can redistribute it and/or
23  *      modify it under the terms of the GNU General Public License
24  *      as published by the Free Software Foundation; either version
25  *      2 of the License, or (at your option) any later version.
26  *
27  */
28
29 /* tunnel.c: an IP tunnel driver
30
31         The purpose of this driver is to provide an IP tunnel through
32         which you can tunnel network traffic transparently across subnets.
33
34         This was written by looking at Nick Holloway's dummy driver
35         Thanks for the great code!
36
37                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
38                 
39         Minor tweaks:
40                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
41                 dev->hard_header/hard_header_len changed to use no headers.
42                 Comments/bracketing tweaked.
43                 Made the tunnels use dev->name not tunnel: when error reporting.
44                 Added tx_dropped stat
45                 
46                 -Alan Cox       (Alan.Cox@linux.org) 21 March 95
47
48         Reworked:
49                 Changed to tunnel to destination gateway in addition to the
50                         tunnel's pointopoint address
51                 Almost completely rewritten
52                 Note:  There is currently no firewall or ICMP handling done.
53
54                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
55                 
56 */
57
58 /* Things I wish I had known when writing the tunnel driver:
59
60         When the tunnel_xmit() function is called, the skb contains the
61         packet to be sent (plus a great deal of extra info), and dev
62         contains the tunnel device that _we_ are.
63
64         When we are passed a packet, we are expected to fill in the
65         source address with our source IP address.
66
67         What is the proper way to allocate, copy and free a buffer?
68         After you allocate it, it is a "0 length" chunk of memory
69         starting at zero.  If you want to add headers to the buffer
70         later, you'll have to call "skb_reserve(skb, amount)" with
71         the amount of memory you want reserved.  Then, you call
72         "skb_put(skb, amount)" with the amount of space you want in
73         the buffer.  skb_put() returns a pointer to the top (#0) of
74         that buffer.  skb->len is set to the amount of space you have
75         "allocated" with skb_put().  You can then write up to skb->len
76         bytes to that buffer.  If you need more, you can call skb_put()
77         again with the additional amount of space you need.  You can
78         find out how much more space you can allocate by calling 
79         "skb_tailroom(skb)".
80         Now, to add header space, call "skb_push(skb, header_len)".
81         This creates space at the beginning of the buffer and returns
82         a pointer to this new space.  If later you need to strip a
83         header from a buffer, call "skb_pull(skb, header_len)".
84         skb_headroom() will return how much space is left at the top
85         of the buffer (before the main data).  Remember, this headroom
86         space must be reserved before the skb_put() function is called.
87         */
88
89 /*
90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91
92    For comments look at net/ipv4/ip_gre.c --ANK
93  */
94
95  
96 #include <linux/config.h>
97 #include <linux/module.h>
98 #include <linux/types.h>
99 #include <linux/sched.h>
100 #include <linux/kernel.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <linux/in.h>
105 #include <linux/tcp.h>
106 #include <linux/udp.h>
107 #include <linux/if_arp.h>
108 #include <linux/mroute.h>
109 #include <linux/init.h>
110 #include <linux/netfilter_ipv4.h>
111 #include <linux/if_ether.h>
112
113 #include <net/sock.h>
114 #include <net/ip.h>
115 #include <net/icmp.h>
116 #include <net/protocol.h>
117 #include <net/ipip.h>
118 #include <net/inet_ecn.h>
119 #include <net/xfrm.h>
120
121 #define HASH_SIZE  16
122 #define HASH(addr) ((addr^(addr>>4))&0xF)
123
124 static int ipip_fb_tunnel_init(struct net_device *dev);
125 static int ipip_tunnel_init(struct net_device *dev);
126 static void ipip_tunnel_setup(struct net_device *dev);
127
128 static struct net_device *ipip_fb_tunnel_dev;
129
130 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
131 static struct ip_tunnel *tunnels_r[HASH_SIZE];
132 static struct ip_tunnel *tunnels_l[HASH_SIZE];
133 static struct ip_tunnel *tunnels_wc[1];
134 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
135
136 static DEFINE_RWLOCK(ipip_lock);
137
138 static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local)
139 {
140         unsigned h0 = HASH(remote);
141         unsigned h1 = HASH(local);
142         struct ip_tunnel *t;
143
144         for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
145                 if (local == t->parms.iph.saddr &&
146                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
147                         return t;
148         }
149         for (t = tunnels_r[h0]; t; t = t->next) {
150                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
151                         return t;
152         }
153         for (t = tunnels_l[h1]; t; t = t->next) {
154                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
155                         return t;
156         }
157         if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
158                 return t;
159         return NULL;
160 }
161
162 static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
163 {
164         u32 remote = t->parms.iph.daddr;
165         u32 local = t->parms.iph.saddr;
166         unsigned h = 0;
167         int prio = 0;
168
169         if (remote) {
170                 prio |= 2;
171                 h ^= HASH(remote);
172         }
173         if (local) {
174                 prio |= 1;
175                 h ^= HASH(local);
176         }
177         return &tunnels[prio][h];
178 }
179
180
181 static void ipip_tunnel_unlink(struct ip_tunnel *t)
182 {
183         struct ip_tunnel **tp;
184
185         for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
186                 if (t == *tp) {
187                         write_lock_bh(&ipip_lock);
188                         *tp = t->next;
189                         write_unlock_bh(&ipip_lock);
190                         break;
191                 }
192         }
193 }
194
195 static void ipip_tunnel_link(struct ip_tunnel *t)
196 {
197         struct ip_tunnel **tp = ipip_bucket(t);
198
199         t->next = *tp;
200         write_lock_bh(&ipip_lock);
201         *tp = t;
202         write_unlock_bh(&ipip_lock);
203 }
204
205 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
206 {
207         u32 remote = parms->iph.daddr;
208         u32 local = parms->iph.saddr;
209         struct ip_tunnel *t, **tp, *nt;
210         struct net_device *dev;
211         unsigned h = 0;
212         int prio = 0;
213         char name[IFNAMSIZ];
214
215         if (remote) {
216                 prio |= 2;
217                 h ^= HASH(remote);
218         }
219         if (local) {
220                 prio |= 1;
221                 h ^= HASH(local);
222         }
223         for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
224                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
225                         return t;
226         }
227         if (!create)
228                 return NULL;
229
230         if (parms->name[0])
231                 strlcpy(name, parms->name, IFNAMSIZ);
232         else {
233                 int i;
234                 for (i=1; i<100; i++) {
235                         sprintf(name, "tunl%d", i);
236                         if (__dev_get_by_name(name) == NULL)
237                                 break;
238                 }
239                 if (i==100)
240                         goto failed;
241         }
242
243         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
244         if (dev == NULL)
245                 return NULL;
246
247         nt = netdev_priv(dev);
248         SET_MODULE_OWNER(dev);
249         dev->init = ipip_tunnel_init;
250         nt->parms = *parms;
251
252         if (register_netdevice(dev) < 0) {
253                 free_netdev(dev);
254                 goto failed;
255         }
256
257         dev_hold(dev);
258         ipip_tunnel_link(nt);
259         return nt;
260
261 failed:
262         return NULL;
263 }
264
265 static void ipip_tunnel_uninit(struct net_device *dev)
266 {
267         if (dev == ipip_fb_tunnel_dev) {
268                 write_lock_bh(&ipip_lock);
269                 tunnels_wc[0] = NULL;
270                 write_unlock_bh(&ipip_lock);
271         } else
272                 ipip_tunnel_unlink(netdev_priv(dev));
273         dev_put(dev);
274 }
275
276 static void ipip_err(struct sk_buff *skb, u32 info)
277 {
278 #ifndef I_WISH_WORLD_WERE_PERFECT
279
280 /* It is not :-( All the routers (except for Linux) return only
281    8 bytes of packet payload. It means, that precise relaying of
282    ICMP in the real Internet is absolutely infeasible.
283  */
284         struct iphdr *iph = (struct iphdr*)skb->data;
285         int type = skb->h.icmph->type;
286         int code = skb->h.icmph->code;
287         struct ip_tunnel *t;
288
289         switch (type) {
290         default:
291         case ICMP_PARAMETERPROB:
292                 return;
293
294         case ICMP_DEST_UNREACH:
295                 switch (code) {
296                 case ICMP_SR_FAILED:
297                 case ICMP_PORT_UNREACH:
298                         /* Impossible event. */
299                         return;
300                 case ICMP_FRAG_NEEDED:
301                         /* Soft state for pmtu is maintained by IP core. */
302                         return;
303                 default:
304                         /* All others are translated to HOST_UNREACH.
305                            rfc2003 contains "deep thoughts" about NET_UNREACH,
306                            I believe they are just ether pollution. --ANK
307                          */
308                         break;
309                 }
310                 break;
311         case ICMP_TIME_EXCEEDED:
312                 if (code != ICMP_EXC_TTL)
313                         return;
314                 break;
315         }
316
317         read_lock(&ipip_lock);
318         t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
319         if (t == NULL || t->parms.iph.daddr == 0)
320                 goto out;
321         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
322                 goto out;
323
324         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
325                 t->err_count++;
326         else
327                 t->err_count = 1;
328         t->err_time = jiffies;
329 out:
330         read_unlock(&ipip_lock);
331         return;
332 #else
333         struct iphdr *iph = (struct iphdr*)dp;
334         int hlen = iph->ihl<<2;
335         struct iphdr *eiph;
336         int type = skb->h.icmph->type;
337         int code = skb->h.icmph->code;
338         int rel_type = 0;
339         int rel_code = 0;
340         int rel_info = 0;
341         struct sk_buff *skb2;
342         struct flowi fl;
343         struct rtable *rt;
344
345         if (len < hlen + sizeof(struct iphdr))
346                 return;
347         eiph = (struct iphdr*)(dp + hlen);
348
349         switch (type) {
350         default:
351                 return;
352         case ICMP_PARAMETERPROB:
353                 if (skb->h.icmph->un.gateway < hlen)
354                         return;
355
356                 /* So... This guy found something strange INSIDE encapsulated
357                    packet. Well, he is fool, but what can we do ?
358                  */
359                 rel_type = ICMP_PARAMETERPROB;
360                 rel_info = skb->h.icmph->un.gateway - hlen;
361                 break;
362
363         case ICMP_DEST_UNREACH:
364                 switch (code) {
365                 case ICMP_SR_FAILED:
366                 case ICMP_PORT_UNREACH:
367                         /* Impossible event. */
368                         return;
369                 case ICMP_FRAG_NEEDED:
370                         /* And it is the only really necessary thing :-) */
371                         rel_info = ntohs(skb->h.icmph->un.frag.mtu);
372                         if (rel_info < hlen+68)
373                                 return;
374                         rel_info -= hlen;
375                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
376                         if (rel_info > ntohs(eiph->tot_len))
377                                 return;
378                         break;
379                 default:
380                         /* All others are translated to HOST_UNREACH.
381                            rfc2003 contains "deep thoughts" about NET_UNREACH,
382                            I believe, it is just ether pollution. --ANK
383                          */
384                         rel_type = ICMP_DEST_UNREACH;
385                         rel_code = ICMP_HOST_UNREACH;
386                         break;
387                 }
388                 break;
389         case ICMP_TIME_EXCEEDED:
390                 if (code != ICMP_EXC_TTL)
391                         return;
392                 break;
393         }
394
395         /* Prepare fake skb to feed it to icmp_send */
396         skb2 = skb_clone(skb, GFP_ATOMIC);
397         if (skb2 == NULL)
398                 return;
399         dst_release(skb2->dst);
400         skb2->dst = NULL;
401         skb_pull(skb2, skb->data - (u8*)eiph);
402         skb2->nh.raw = skb2->data;
403
404         /* Try to guess incoming interface */
405         memset(&fl, 0, sizeof(fl));
406         fl.fl4_daddr = eiph->saddr;
407         fl.fl4_tos = RT_TOS(eiph->tos);
408         fl.proto = IPPROTO_IPIP;
409         if (ip_route_output_key(&rt, &key)) {
410                 kfree_skb(skb2);
411                 return;
412         }
413         skb2->dev = rt->u.dst.dev;
414
415         /* route "incoming" packet */
416         if (rt->rt_flags&RTCF_LOCAL) {
417                 ip_rt_put(rt);
418                 rt = NULL;
419                 fl.fl4_daddr = eiph->daddr;
420                 fl.fl4_src = eiph->saddr;
421                 fl.fl4_tos = eiph->tos;
422                 if (ip_route_output_key(&rt, &fl) ||
423                     rt->u.dst.dev->type != ARPHRD_TUNNEL) {
424                         ip_rt_put(rt);
425                         kfree_skb(skb2);
426                         return;
427                 }
428         } else {
429                 ip_rt_put(rt);
430                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
431                     skb2->dst->dev->type != ARPHRD_TUNNEL) {
432                         kfree_skb(skb2);
433                         return;
434                 }
435         }
436
437         /* change mtu on this route */
438         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
439                 if (rel_info > dst_mtu(skb2->dst)) {
440                         kfree_skb(skb2);
441                         return;
442                 }
443                 skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
444                 rel_info = htonl(rel_info);
445         } else if (type == ICMP_TIME_EXCEEDED) {
446                 struct ip_tunnel *t = netdev_priv(skb2->dev);
447                 if (t->parms.iph.ttl) {
448                         rel_type = ICMP_DEST_UNREACH;
449                         rel_code = ICMP_HOST_UNREACH;
450                 }
451         }
452
453         icmp_send(skb2, rel_type, rel_code, rel_info);
454         kfree_skb(skb2);
455         return;
456 #endif
457 }
458
459 static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb)
460 {
461         struct iphdr *inner_iph = skb->nh.iph;
462
463         if (INET_ECN_is_ce(outer_iph->tos))
464                 IP_ECN_set_ce(inner_iph);
465 }
466
467 static int ipip_rcv(struct sk_buff *skb)
468 {
469         struct iphdr *iph;
470         struct ip_tunnel *tunnel;
471
472         if (!pskb_may_pull(skb, sizeof(struct iphdr)))
473                 goto out;
474
475         iph = skb->nh.iph;
476
477         read_lock(&ipip_lock);
478         if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
479                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
480                         read_unlock(&ipip_lock);
481                         kfree_skb(skb);
482                         return 0;
483                 }
484
485                 secpath_reset(skb);
486
487                 skb->mac.raw = skb->nh.raw;
488                 skb->nh.raw = skb->data;
489                 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
490                 skb->protocol = htons(ETH_P_IP);
491                 skb->pkt_type = PACKET_HOST;
492
493                 tunnel->stat.rx_packets++;
494                 tunnel->stat.rx_bytes += skb->len;
495                 skb->dev = tunnel->dev;
496                 dst_release(skb->dst);
497                 skb->dst = NULL;
498                 nf_reset(skb);
499                 ipip_ecn_decapsulate(iph, skb);
500                 netif_rx(skb);
501                 read_unlock(&ipip_lock);
502                 return 0;
503         }
504         read_unlock(&ipip_lock);
505
506 out:
507         return -1;
508 }
509
510 /*
511  *      This function assumes it is being called from dev_queue_xmit()
512  *      and that skb is filled properly by that function.
513  */
514
515 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
516 {
517         struct ip_tunnel *tunnel = netdev_priv(dev);
518         struct net_device_stats *stats = &tunnel->stat;
519         struct iphdr  *tiph = &tunnel->parms.iph;
520         u8     tos = tunnel->parms.iph.tos;
521         u16    df = tiph->frag_off;
522         struct rtable *rt;                      /* Route to the other host */
523         struct net_device *tdev;                        /* Device to other host */
524         struct iphdr  *old_iph = skb->nh.iph;
525         struct iphdr  *iph;                     /* Our new IP header */
526         int    max_headroom;                    /* The extra header space needed */
527         u32    dst = tiph->daddr;
528         int    mtu;
529
530         if (tunnel->recursion++) {
531                 tunnel->stat.collisions++;
532                 goto tx_error;
533         }
534
535         if (skb->protocol != htons(ETH_P_IP))
536                 goto tx_error;
537
538         if (tos&1)
539                 tos = old_iph->tos;
540
541         if (!dst) {
542                 /* NBMA tunnel */
543                 if ((rt = (struct rtable*)skb->dst) == NULL) {
544                         tunnel->stat.tx_fifo_errors++;
545                         goto tx_error;
546                 }
547                 if ((dst = rt->rt_gateway) == 0)
548                         goto tx_error_icmp;
549         }
550
551         {
552                 struct flowi fl = { .oif = tunnel->parms.link,
553                                     .nl_u = { .ip4_u =
554                                               { .daddr = dst,
555                                                 .saddr = tiph->saddr,
556                                                 .tos = RT_TOS(tos) } },
557                                     .proto = IPPROTO_IPIP };
558                 if (ip_route_output_key(&rt, &fl)) {
559                         tunnel->stat.tx_carrier_errors++;
560                         goto tx_error_icmp;
561                 }
562         }
563         tdev = rt->u.dst.dev;
564
565         if (tdev == dev) {
566                 ip_rt_put(rt);
567                 tunnel->stat.collisions++;
568                 goto tx_error;
569         }
570
571         if (tiph->frag_off)
572                 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
573         else
574                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
575
576         if (mtu < 68) {
577                 tunnel->stat.collisions++;
578                 ip_rt_put(rt);
579                 goto tx_error;
580         }
581         if (skb->dst)
582                 skb->dst->ops->update_pmtu(skb->dst, mtu);
583
584         df |= (old_iph->frag_off&htons(IP_DF));
585
586         if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
587                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
588                 ip_rt_put(rt);
589                 goto tx_error;
590         }
591
592         if (tunnel->err_count > 0) {
593                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
594                         tunnel->err_count--;
595                         dst_link_failure(skb);
596                 } else
597                         tunnel->err_count = 0;
598         }
599
600         /*
601          * Okay, now see if we can stuff it in the buffer as-is.
602          */
603         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
604
605         if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
606                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
607                 if (!new_skb) {
608                         ip_rt_put(rt);
609                         stats->tx_dropped++;
610                         dev_kfree_skb(skb);
611                         tunnel->recursion--;
612                         return 0;
613                 }
614                 if (skb->sk)
615                         skb_set_owner_w(new_skb, skb->sk);
616                 dev_kfree_skb(skb);
617                 skb = new_skb;
618                 old_iph = skb->nh.iph;
619         }
620
621         skb->h.raw = skb->nh.raw;
622         skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
623         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
624         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED);
625         dst_release(skb->dst);
626         skb->dst = &rt->u.dst;
627
628         /*
629          *      Push down and install the IPIP header.
630          */
631
632         iph                     =       skb->nh.iph;
633         iph->version            =       4;
634         iph->ihl                =       sizeof(struct iphdr)>>2;
635         iph->frag_off           =       df;
636         iph->protocol           =       IPPROTO_IPIP;
637         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
638         iph->daddr              =       rt->rt_dst;
639         iph->saddr              =       rt->rt_src;
640
641         if ((iph->ttl = tiph->ttl) == 0)
642                 iph->ttl        =       old_iph->ttl;
643
644         nf_reset(skb);
645
646         IPTUNNEL_XMIT();
647         tunnel->recursion--;
648         return 0;
649
650 tx_error_icmp:
651         dst_link_failure(skb);
652 tx_error:
653         stats->tx_errors++;
654         dev_kfree_skb(skb);
655         tunnel->recursion--;
656         return 0;
657 }
658
659 static int
660 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
661 {
662         int err = 0;
663         struct ip_tunnel_parm p;
664         struct ip_tunnel *t;
665
666         switch (cmd) {
667         case SIOCGETTUNNEL:
668                 t = NULL;
669                 if (dev == ipip_fb_tunnel_dev) {
670                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
671                                 err = -EFAULT;
672                                 break;
673                         }
674                         t = ipip_tunnel_locate(&p, 0);
675                 }
676                 if (t == NULL)
677                         t = netdev_priv(dev);
678                 memcpy(&p, &t->parms, sizeof(p));
679                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
680                         err = -EFAULT;
681                 break;
682
683         case SIOCADDTUNNEL:
684         case SIOCCHGTUNNEL:
685                 err = -EPERM;
686                 if (!capable(CAP_NET_ADMIN))
687                         goto done;
688
689                 err = -EFAULT;
690                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
691                         goto done;
692
693                 err = -EINVAL;
694                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
695                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
696                         goto done;
697                 if (p.iph.ttl)
698                         p.iph.frag_off |= htons(IP_DF);
699
700                 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
701
702                 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
703                         if (t != NULL) {
704                                 if (t->dev != dev) {
705                                         err = -EEXIST;
706                                         break;
707                                 }
708                         } else {
709                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
710                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
711                                         err = -EINVAL;
712                                         break;
713                                 }
714                                 t = netdev_priv(dev);
715                                 ipip_tunnel_unlink(t);
716                                 t->parms.iph.saddr = p.iph.saddr;
717                                 t->parms.iph.daddr = p.iph.daddr;
718                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
719                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
720                                 ipip_tunnel_link(t);
721                                 netdev_state_change(dev);
722                         }
723                 }
724
725                 if (t) {
726                         err = 0;
727                         if (cmd == SIOCCHGTUNNEL) {
728                                 t->parms.iph.ttl = p.iph.ttl;
729                                 t->parms.iph.tos = p.iph.tos;
730                                 t->parms.iph.frag_off = p.iph.frag_off;
731                         }
732                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
733                                 err = -EFAULT;
734                 } else
735                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
736                 break;
737
738         case SIOCDELTUNNEL:
739                 err = -EPERM;
740                 if (!capable(CAP_NET_ADMIN))
741                         goto done;
742
743                 if (dev == ipip_fb_tunnel_dev) {
744                         err = -EFAULT;
745                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
746                                 goto done;
747                         err = -ENOENT;
748                         if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
749                                 goto done;
750                         err = -EPERM;
751                         if (t->dev == ipip_fb_tunnel_dev)
752                                 goto done;
753                         dev = t->dev;
754                 }
755                 err = unregister_netdevice(dev);
756                 break;
757
758         default:
759                 err = -EINVAL;
760         }
761
762 done:
763         return err;
764 }
765
766 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
767 {
768         return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
769 }
770
771 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
772 {
773         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
774                 return -EINVAL;
775         dev->mtu = new_mtu;
776         return 0;
777 }
778
779 static void ipip_tunnel_setup(struct net_device *dev)
780 {
781         SET_MODULE_OWNER(dev);
782         dev->uninit             = ipip_tunnel_uninit;
783         dev->hard_start_xmit    = ipip_tunnel_xmit;
784         dev->get_stats          = ipip_tunnel_get_stats;
785         dev->do_ioctl           = ipip_tunnel_ioctl;
786         dev->change_mtu         = ipip_tunnel_change_mtu;
787         dev->destructor         = free_netdev;
788
789         dev->type               = ARPHRD_TUNNEL;
790         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
791         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
792         dev->flags              = IFF_NOARP;
793         dev->iflink             = 0;
794         dev->addr_len           = 4;
795 }
796
797 static int ipip_tunnel_init(struct net_device *dev)
798 {
799         struct net_device *tdev = NULL;
800         struct ip_tunnel *tunnel;
801         struct iphdr *iph;
802
803         tunnel = netdev_priv(dev);
804         iph = &tunnel->parms.iph;
805
806         tunnel->dev = dev;
807         strcpy(tunnel->parms.name, dev->name);
808
809         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
810         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
811
812         if (iph->daddr) {
813                 struct flowi fl = { .oif = tunnel->parms.link,
814                                     .nl_u = { .ip4_u =
815                                               { .daddr = iph->daddr,
816                                                 .saddr = iph->saddr,
817                                                 .tos = RT_TOS(iph->tos) } },
818                                     .proto = IPPROTO_IPIP };
819                 struct rtable *rt;
820                 if (!ip_route_output_key(&rt, &fl)) {
821                         tdev = rt->u.dst.dev;
822                         ip_rt_put(rt);
823                 }
824                 dev->flags |= IFF_POINTOPOINT;
825         }
826
827         if (!tdev && tunnel->parms.link)
828                 tdev = __dev_get_by_index(tunnel->parms.link);
829
830         if (tdev) {
831                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
832                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
833         }
834         dev->iflink = tunnel->parms.link;
835
836         return 0;
837 }
838
839 static int __init ipip_fb_tunnel_init(struct net_device *dev)
840 {
841         struct ip_tunnel *tunnel = netdev_priv(dev);
842         struct iphdr *iph = &tunnel->parms.iph;
843
844         tunnel->dev = dev;
845         strcpy(tunnel->parms.name, dev->name);
846
847         iph->version            = 4;
848         iph->protocol           = IPPROTO_IPIP;
849         iph->ihl                = 5;
850
851         dev_hold(dev);
852         tunnels_wc[0]           = tunnel;
853         return 0;
854 }
855
856 #ifdef CONFIG_INET_TUNNEL
857 static struct xfrm_tunnel ipip_handler = {
858         .handler        =       ipip_rcv,
859         .err_handler    =       ipip_err,
860 };
861
862 static inline int ipip_register(void)
863 {
864         return xfrm4_tunnel_register(&ipip_handler);
865 }
866
867 static inline int ipip_unregister(void)
868 {
869         return xfrm4_tunnel_deregister(&ipip_handler);
870 }
871 #else
872 static struct net_protocol ipip_protocol = {
873         .handler        =       ipip_rcv,
874         .err_handler    =       ipip_err,
875         .no_policy      =       1,
876 };
877
878 static inline int ipip_register(void)
879 {
880         return inet_add_protocol(&ipip_protocol, IPPROTO_IPIP);
881 }
882
883 static inline int ipip_unregister(void)
884 {
885         return inet_del_protocol(&ipip_protocol, IPPROTO_IPIP);
886 }
887 #endif
888
889 static char banner[] __initdata =
890         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
891
892 static int __init ipip_init(void)
893 {
894         int err;
895
896         printk(banner);
897
898         if (ipip_register() < 0) {
899                 printk(KERN_INFO "ipip init: can't register tunnel\n");
900                 return -EAGAIN;
901         }
902
903         ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
904                                            "tunl0",
905                                            ipip_tunnel_setup);
906         if (!ipip_fb_tunnel_dev) {
907                 err = -ENOMEM;
908                 goto err1;
909         }
910
911         ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
912
913         if ((err = register_netdev(ipip_fb_tunnel_dev)))
914                 goto err2;
915  out:
916         return err;
917  err2:
918         free_netdev(ipip_fb_tunnel_dev);
919  err1:
920         ipip_unregister();
921         goto out;
922 }
923
924 static void __exit ipip_destroy_tunnels(void)
925 {
926         int prio;
927
928         for (prio = 1; prio < 4; prio++) {
929                 int h;
930                 for (h = 0; h < HASH_SIZE; h++) {
931                         struct ip_tunnel *t;
932                         while ((t = tunnels[prio][h]) != NULL)
933                                 unregister_netdevice(t->dev);
934                 }
935         }
936 }
937
938 static void __exit ipip_fini(void)
939 {
940         if (ipip_unregister() < 0)
941                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
942
943         rtnl_lock();
944         ipip_destroy_tunnels();
945         unregister_netdevice(ipip_fb_tunnel_dev);
946         rtnl_unlock();
947 }
948
949 module_init(ipip_init);
950 module_exit(ipip_fini);
951 MODULE_LICENSE("GPL");