net: skb->rtable accessor
[safe/jmp/linux-2.6] / net / ipv4 / ipip.c
1 /*
2  *      Linux NET3:     IP/IP protocol decoder.
3  *
4  *      Authors:
5  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
6  *
7  *      Fixes:
8  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
9  *                                      a module taking up 2 pages).
10  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11  *                                      to keep ip_forward happy.
12  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
14  *              David Woodhouse :       Perform some basic ICMP handling.
15  *                                      IPIP Routing without decapsulation.
16  *              Carlos Picoto   :       GRE over IP support
17  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18  *                                      I do not want to merge them together.
19  *
20  *      This program is free software; you can redistribute it and/or
21  *      modify it under the terms of the GNU General Public License
22  *      as published by the Free Software Foundation; either version
23  *      2 of the License, or (at your option) any later version.
24  *
25  */
26
27 /* tunnel.c: an IP tunnel driver
28
29         The purpose of this driver is to provide an IP tunnel through
30         which you can tunnel network traffic transparently across subnets.
31
32         This was written by looking at Nick Holloway's dummy driver
33         Thanks for the great code!
34
35                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
36
37         Minor tweaks:
38                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
39                 dev->hard_header/hard_header_len changed to use no headers.
40                 Comments/bracketing tweaked.
41                 Made the tunnels use dev->name not tunnel: when error reporting.
42                 Added tx_dropped stat
43
44                 -Alan Cox       (alan@lxorguk.ukuu.org.uk) 21 March 95
45
46         Reworked:
47                 Changed to tunnel to destination gateway in addition to the
48                         tunnel's pointopoint address
49                 Almost completely rewritten
50                 Note:  There is currently no firewall or ICMP handling done.
51
52                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
53
54 */
55
56 /* Things I wish I had known when writing the tunnel driver:
57
58         When the tunnel_xmit() function is called, the skb contains the
59         packet to be sent (plus a great deal of extra info), and dev
60         contains the tunnel device that _we_ are.
61
62         When we are passed a packet, we are expected to fill in the
63         source address with our source IP address.
64
65         What is the proper way to allocate, copy and free a buffer?
66         After you allocate it, it is a "0 length" chunk of memory
67         starting at zero.  If you want to add headers to the buffer
68         later, you'll have to call "skb_reserve(skb, amount)" with
69         the amount of memory you want reserved.  Then, you call
70         "skb_put(skb, amount)" with the amount of space you want in
71         the buffer.  skb_put() returns a pointer to the top (#0) of
72         that buffer.  skb->len is set to the amount of space you have
73         "allocated" with skb_put().  You can then write up to skb->len
74         bytes to that buffer.  If you need more, you can call skb_put()
75         again with the additional amount of space you need.  You can
76         find out how much more space you can allocate by calling
77         "skb_tailroom(skb)".
78         Now, to add header space, call "skb_push(skb, header_len)".
79         This creates space at the beginning of the buffer and returns
80         a pointer to this new space.  If later you need to strip a
81         header from a buffer, call "skb_pull(skb, header_len)".
82         skb_headroom() will return how much space is left at the top
83         of the buffer (before the main data).  Remember, this headroom
84         space must be reserved before the skb_put() function is called.
85         */
86
87 /*
88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89
90    For comments look at net/ipv4/ip_gre.c --ANK
91  */
92
93
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <asm/uaccess.h>
99 #include <linux/skbuff.h>
100 #include <linux/netdevice.h>
101 #include <linux/in.h>
102 #include <linux/tcp.h>
103 #include <linux/udp.h>
104 #include <linux/if_arp.h>
105 #include <linux/mroute.h>
106 #include <linux/init.h>
107 #include <linux/netfilter_ipv4.h>
108 #include <linux/if_ether.h>
109
110 #include <net/sock.h>
111 #include <net/ip.h>
112 #include <net/icmp.h>
113 #include <net/ipip.h>
114 #include <net/inet_ecn.h>
115 #include <net/xfrm.h>
116 #include <net/net_namespace.h>
117 #include <net/netns/generic.h>
118
119 #define HASH_SIZE  16
120 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
121
122 static int ipip_net_id;
123 struct ipip_net {
124         struct ip_tunnel *tunnels_r_l[HASH_SIZE];
125         struct ip_tunnel *tunnels_r[HASH_SIZE];
126         struct ip_tunnel *tunnels_l[HASH_SIZE];
127         struct ip_tunnel *tunnels_wc[1];
128         struct ip_tunnel **tunnels[4];
129
130         struct net_device *fb_tunnel_dev;
131 };
132
133 static void ipip_fb_tunnel_init(struct net_device *dev);
134 static void ipip_tunnel_init(struct net_device *dev);
135 static void ipip_tunnel_setup(struct net_device *dev);
136
137 static DEFINE_RWLOCK(ipip_lock);
138
139 static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
140                 __be32 remote, __be32 local)
141 {
142         unsigned h0 = HASH(remote);
143         unsigned h1 = HASH(local);
144         struct ip_tunnel *t;
145         struct ipip_net *ipn = net_generic(net, ipip_net_id);
146
147         for (t = ipn->tunnels_r_l[h0^h1]; t; t = t->next) {
148                 if (local == t->parms.iph.saddr &&
149                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
150                         return t;
151         }
152         for (t = ipn->tunnels_r[h0]; t; t = t->next) {
153                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
154                         return t;
155         }
156         for (t = ipn->tunnels_l[h1]; t; t = t->next) {
157                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
158                         return t;
159         }
160         if ((t = ipn->tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
161                 return t;
162         return NULL;
163 }
164
165 static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn,
166                 struct ip_tunnel_parm *parms)
167 {
168         __be32 remote = parms->iph.daddr;
169         __be32 local = parms->iph.saddr;
170         unsigned h = 0;
171         int prio = 0;
172
173         if (remote) {
174                 prio |= 2;
175                 h ^= HASH(remote);
176         }
177         if (local) {
178                 prio |= 1;
179                 h ^= HASH(local);
180         }
181         return &ipn->tunnels[prio][h];
182 }
183
184 static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn,
185                 struct ip_tunnel *t)
186 {
187         return __ipip_bucket(ipn, &t->parms);
188 }
189
190 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
191 {
192         struct ip_tunnel **tp;
193
194         for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) {
195                 if (t == *tp) {
196                         write_lock_bh(&ipip_lock);
197                         *tp = t->next;
198                         write_unlock_bh(&ipip_lock);
199                         break;
200                 }
201         }
202 }
203
204 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
205 {
206         struct ip_tunnel **tp = ipip_bucket(ipn, t);
207
208         t->next = *tp;
209         write_lock_bh(&ipip_lock);
210         *tp = t;
211         write_unlock_bh(&ipip_lock);
212 }
213
214 static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
215                 struct ip_tunnel_parm *parms, int create)
216 {
217         __be32 remote = parms->iph.daddr;
218         __be32 local = parms->iph.saddr;
219         struct ip_tunnel *t, **tp, *nt;
220         struct net_device *dev;
221         char name[IFNAMSIZ];
222         struct ipip_net *ipn = net_generic(net, ipip_net_id);
223
224         for (tp = __ipip_bucket(ipn, parms); (t = *tp) != NULL; tp = &t->next) {
225                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
226                         return t;
227         }
228         if (!create)
229                 return NULL;
230
231         if (parms->name[0])
232                 strlcpy(name, parms->name, IFNAMSIZ);
233         else
234                 sprintf(name, "tunl%%d");
235
236         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
237         if (dev == NULL)
238                 return NULL;
239
240         dev_net_set(dev, net);
241
242         if (strchr(name, '%')) {
243                 if (dev_alloc_name(dev, name) < 0)
244                         goto failed_free;
245         }
246
247         nt = netdev_priv(dev);
248         nt->parms = *parms;
249
250         ipip_tunnel_init(dev);
251
252         if (register_netdevice(dev) < 0)
253                 goto failed_free;
254
255         dev_hold(dev);
256         ipip_tunnel_link(ipn, nt);
257         return nt;
258
259 failed_free:
260         free_netdev(dev);
261         return NULL;
262 }
263
264 static void ipip_tunnel_uninit(struct net_device *dev)
265 {
266         struct net *net = dev_net(dev);
267         struct ipip_net *ipn = net_generic(net, ipip_net_id);
268
269         if (dev == ipn->fb_tunnel_dev) {
270                 write_lock_bh(&ipip_lock);
271                 ipn->tunnels_wc[0] = NULL;
272                 write_unlock_bh(&ipip_lock);
273         } else
274                 ipip_tunnel_unlink(ipn, netdev_priv(dev));
275         dev_put(dev);
276 }
277
278 static int ipip_err(struct sk_buff *skb, u32 info)
279 {
280
281 /* All the routers (except for Linux) return only
282    8 bytes of packet payload. It means, that precise relaying of
283    ICMP in the real Internet is absolutely infeasible.
284  */
285         struct iphdr *iph = (struct iphdr *)skb->data;
286         const int type = icmp_hdr(skb)->type;
287         const int code = icmp_hdr(skb)->code;
288         struct ip_tunnel *t;
289         int err;
290
291         switch (type) {
292         default:
293         case ICMP_PARAMETERPROB:
294                 return 0;
295
296         case ICMP_DEST_UNREACH:
297                 switch (code) {
298                 case ICMP_SR_FAILED:
299                 case ICMP_PORT_UNREACH:
300                         /* Impossible event. */
301                         return 0;
302                 case ICMP_FRAG_NEEDED:
303                         /* Soft state for pmtu is maintained by IP core. */
304                         return 0;
305                 default:
306                         /* All others are translated to HOST_UNREACH.
307                            rfc2003 contains "deep thoughts" about NET_UNREACH,
308                            I believe they are just ether pollution. --ANK
309                          */
310                         break;
311                 }
312                 break;
313         case ICMP_TIME_EXCEEDED:
314                 if (code != ICMP_EXC_TTL)
315                         return 0;
316                 break;
317         }
318
319         err = -ENOENT;
320
321         read_lock(&ipip_lock);
322         t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
323         if (t == NULL || t->parms.iph.daddr == 0)
324                 goto out;
325
326         err = 0;
327         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
328                 goto out;
329
330         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
331                 t->err_count++;
332         else
333                 t->err_count = 1;
334         t->err_time = jiffies;
335 out:
336         read_unlock(&ipip_lock);
337         return err;
338 }
339
340 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
341                                         struct sk_buff *skb)
342 {
343         struct iphdr *inner_iph = ip_hdr(skb);
344
345         if (INET_ECN_is_ce(outer_iph->tos))
346                 IP_ECN_set_ce(inner_iph);
347 }
348
349 static int ipip_rcv(struct sk_buff *skb)
350 {
351         struct ip_tunnel *tunnel;
352         const struct iphdr *iph = ip_hdr(skb);
353
354         read_lock(&ipip_lock);
355         if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev),
356                                         iph->saddr, iph->daddr)) != NULL) {
357                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
358                         read_unlock(&ipip_lock);
359                         kfree_skb(skb);
360                         return 0;
361                 }
362
363                 secpath_reset(skb);
364
365                 skb->mac_header = skb->network_header;
366                 skb_reset_network_header(skb);
367                 skb->protocol = htons(ETH_P_IP);
368                 skb->pkt_type = PACKET_HOST;
369
370                 tunnel->dev->stats.rx_packets++;
371                 tunnel->dev->stats.rx_bytes += skb->len;
372                 skb->dev = tunnel->dev;
373                 dst_release(skb->dst);
374                 skb->dst = NULL;
375                 nf_reset(skb);
376                 ipip_ecn_decapsulate(iph, skb);
377                 netif_rx(skb);
378                 read_unlock(&ipip_lock);
379                 return 0;
380         }
381         read_unlock(&ipip_lock);
382
383         return -1;
384 }
385
386 /*
387  *      This function assumes it is being called from dev_queue_xmit()
388  *      and that skb is filled properly by that function.
389  */
390
391 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
392 {
393         struct ip_tunnel *tunnel = netdev_priv(dev);
394         struct net_device_stats *stats = &tunnel->dev->stats;
395         struct iphdr  *tiph = &tunnel->parms.iph;
396         u8     tos = tunnel->parms.iph.tos;
397         __be16 df = tiph->frag_off;
398         struct rtable *rt;                      /* Route to the other host */
399         struct net_device *tdev;                        /* Device to other host */
400         struct iphdr  *old_iph = ip_hdr(skb);
401         struct iphdr  *iph;                     /* Our new IP header */
402         unsigned int max_headroom;              /* The extra header space needed */
403         __be32 dst = tiph->daddr;
404         int    mtu;
405
406         if (tunnel->recursion++) {
407                 stats->collisions++;
408                 goto tx_error;
409         }
410
411         if (skb->protocol != htons(ETH_P_IP))
412                 goto tx_error;
413
414         if (tos&1)
415                 tos = old_iph->tos;
416
417         if (!dst) {
418                 /* NBMA tunnel */
419                 if ((rt = skb_rtable(skb)) == NULL) {
420                         stats->tx_fifo_errors++;
421                         goto tx_error;
422                 }
423                 if ((dst = rt->rt_gateway) == 0)
424                         goto tx_error_icmp;
425         }
426
427         {
428                 struct flowi fl = { .oif = tunnel->parms.link,
429                                     .nl_u = { .ip4_u =
430                                               { .daddr = dst,
431                                                 .saddr = tiph->saddr,
432                                                 .tos = RT_TOS(tos) } },
433                                     .proto = IPPROTO_IPIP };
434                 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
435                         stats->tx_carrier_errors++;
436                         goto tx_error_icmp;
437                 }
438         }
439         tdev = rt->u.dst.dev;
440
441         if (tdev == dev) {
442                 ip_rt_put(rt);
443                 stats->collisions++;
444                 goto tx_error;
445         }
446
447         if (tiph->frag_off)
448                 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
449         else
450                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
451
452         if (mtu < 68) {
453                 stats->collisions++;
454                 ip_rt_put(rt);
455                 goto tx_error;
456         }
457         if (skb->dst)
458                 skb->dst->ops->update_pmtu(skb->dst, mtu);
459
460         df |= (old_iph->frag_off&htons(IP_DF));
461
462         if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
463                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
464                 ip_rt_put(rt);
465                 goto tx_error;
466         }
467
468         if (tunnel->err_count > 0) {
469                 if (time_before(jiffies,
470                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
471                         tunnel->err_count--;
472                         dst_link_failure(skb);
473                 } else
474                         tunnel->err_count = 0;
475         }
476
477         /*
478          * Okay, now see if we can stuff it in the buffer as-is.
479          */
480         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
481
482         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
483             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
484                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
485                 if (!new_skb) {
486                         ip_rt_put(rt);
487                         stats->tx_dropped++;
488                         dev_kfree_skb(skb);
489                         tunnel->recursion--;
490                         return 0;
491                 }
492                 if (skb->sk)
493                         skb_set_owner_w(new_skb, skb->sk);
494                 dev_kfree_skb(skb);
495                 skb = new_skb;
496                 old_iph = ip_hdr(skb);
497         }
498
499         skb->transport_header = skb->network_header;
500         skb_push(skb, sizeof(struct iphdr));
501         skb_reset_network_header(skb);
502         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
503         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
504                               IPSKB_REROUTED);
505         dst_release(skb->dst);
506         skb->dst = &rt->u.dst;
507
508         /*
509          *      Push down and install the IPIP header.
510          */
511
512         iph                     =       ip_hdr(skb);
513         iph->version            =       4;
514         iph->ihl                =       sizeof(struct iphdr)>>2;
515         iph->frag_off           =       df;
516         iph->protocol           =       IPPROTO_IPIP;
517         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
518         iph->daddr              =       rt->rt_dst;
519         iph->saddr              =       rt->rt_src;
520
521         if ((iph->ttl = tiph->ttl) == 0)
522                 iph->ttl        =       old_iph->ttl;
523
524         nf_reset(skb);
525
526         IPTUNNEL_XMIT();
527         tunnel->recursion--;
528         return 0;
529
530 tx_error_icmp:
531         dst_link_failure(skb);
532 tx_error:
533         stats->tx_errors++;
534         dev_kfree_skb(skb);
535         tunnel->recursion--;
536         return 0;
537 }
538
539 static void ipip_tunnel_bind_dev(struct net_device *dev)
540 {
541         struct net_device *tdev = NULL;
542         struct ip_tunnel *tunnel;
543         struct iphdr *iph;
544
545         tunnel = netdev_priv(dev);
546         iph = &tunnel->parms.iph;
547
548         if (iph->daddr) {
549                 struct flowi fl = { .oif = tunnel->parms.link,
550                                     .nl_u = { .ip4_u =
551                                               { .daddr = iph->daddr,
552                                                 .saddr = iph->saddr,
553                                                 .tos = RT_TOS(iph->tos) } },
554                                     .proto = IPPROTO_IPIP };
555                 struct rtable *rt;
556                 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
557                         tdev = rt->u.dst.dev;
558                         ip_rt_put(rt);
559                 }
560                 dev->flags |= IFF_POINTOPOINT;
561         }
562
563         if (!tdev && tunnel->parms.link)
564                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
565
566         if (tdev) {
567                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
568                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
569         }
570         dev->iflink = tunnel->parms.link;
571 }
572
573 static int
574 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
575 {
576         int err = 0;
577         struct ip_tunnel_parm p;
578         struct ip_tunnel *t;
579         struct net *net = dev_net(dev);
580         struct ipip_net *ipn = net_generic(net, ipip_net_id);
581
582         switch (cmd) {
583         case SIOCGETTUNNEL:
584                 t = NULL;
585                 if (dev == ipn->fb_tunnel_dev) {
586                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
587                                 err = -EFAULT;
588                                 break;
589                         }
590                         t = ipip_tunnel_locate(net, &p, 0);
591                 }
592                 if (t == NULL)
593                         t = netdev_priv(dev);
594                 memcpy(&p, &t->parms, sizeof(p));
595                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
596                         err = -EFAULT;
597                 break;
598
599         case SIOCADDTUNNEL:
600         case SIOCCHGTUNNEL:
601                 err = -EPERM;
602                 if (!capable(CAP_NET_ADMIN))
603                         goto done;
604
605                 err = -EFAULT;
606                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
607                         goto done;
608
609                 err = -EINVAL;
610                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
611                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
612                         goto done;
613                 if (p.iph.ttl)
614                         p.iph.frag_off |= htons(IP_DF);
615
616                 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
617
618                 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
619                         if (t != NULL) {
620                                 if (t->dev != dev) {
621                                         err = -EEXIST;
622                                         break;
623                                 }
624                         } else {
625                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
626                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
627                                         err = -EINVAL;
628                                         break;
629                                 }
630                                 t = netdev_priv(dev);
631                                 ipip_tunnel_unlink(ipn, t);
632                                 t->parms.iph.saddr = p.iph.saddr;
633                                 t->parms.iph.daddr = p.iph.daddr;
634                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
635                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
636                                 ipip_tunnel_link(ipn, t);
637                                 netdev_state_change(dev);
638                         }
639                 }
640
641                 if (t) {
642                         err = 0;
643                         if (cmd == SIOCCHGTUNNEL) {
644                                 t->parms.iph.ttl = p.iph.ttl;
645                                 t->parms.iph.tos = p.iph.tos;
646                                 t->parms.iph.frag_off = p.iph.frag_off;
647                                 if (t->parms.link != p.link) {
648                                         t->parms.link = p.link;
649                                         ipip_tunnel_bind_dev(dev);
650                                         netdev_state_change(dev);
651                                 }
652                         }
653                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
654                                 err = -EFAULT;
655                 } else
656                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
657                 break;
658
659         case SIOCDELTUNNEL:
660                 err = -EPERM;
661                 if (!capable(CAP_NET_ADMIN))
662                         goto done;
663
664                 if (dev == ipn->fb_tunnel_dev) {
665                         err = -EFAULT;
666                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
667                                 goto done;
668                         err = -ENOENT;
669                         if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
670                                 goto done;
671                         err = -EPERM;
672                         if (t->dev == ipn->fb_tunnel_dev)
673                                 goto done;
674                         dev = t->dev;
675                 }
676                 unregister_netdevice(dev);
677                 err = 0;
678                 break;
679
680         default:
681                 err = -EINVAL;
682         }
683
684 done:
685         return err;
686 }
687
688 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
689 {
690         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
691                 return -EINVAL;
692         dev->mtu = new_mtu;
693         return 0;
694 }
695
696 static const struct net_device_ops ipip_netdev_ops = {
697         .ndo_uninit     = ipip_tunnel_uninit,
698         .ndo_start_xmit = ipip_tunnel_xmit,
699         .ndo_do_ioctl   = ipip_tunnel_ioctl,
700         .ndo_change_mtu = ipip_tunnel_change_mtu,
701
702 };
703
704 static void ipip_tunnel_setup(struct net_device *dev)
705 {
706         dev->netdev_ops         = &ipip_netdev_ops;
707         dev->destructor         = free_netdev;
708
709         dev->type               = ARPHRD_TUNNEL;
710         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
711         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
712         dev->flags              = IFF_NOARP;
713         dev->iflink             = 0;
714         dev->addr_len           = 4;
715         dev->features           |= NETIF_F_NETNS_LOCAL;
716         dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
717 }
718
719 static void ipip_tunnel_init(struct net_device *dev)
720 {
721         struct ip_tunnel *tunnel = netdev_priv(dev);
722
723         tunnel->dev = dev;
724         strcpy(tunnel->parms.name, dev->name);
725
726         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
727         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
728
729         ipip_tunnel_bind_dev(dev);
730 }
731
732 static void ipip_fb_tunnel_init(struct net_device *dev)
733 {
734         struct ip_tunnel *tunnel = netdev_priv(dev);
735         struct iphdr *iph = &tunnel->parms.iph;
736         struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
737
738         tunnel->dev = dev;
739         strcpy(tunnel->parms.name, dev->name);
740
741         iph->version            = 4;
742         iph->protocol           = IPPROTO_IPIP;
743         iph->ihl                = 5;
744
745         dev_hold(dev);
746         ipn->tunnels_wc[0]      = tunnel;
747 }
748
749 static struct xfrm_tunnel ipip_handler = {
750         .handler        =       ipip_rcv,
751         .err_handler    =       ipip_err,
752         .priority       =       1,
753 };
754
755 static const char banner[] __initconst =
756         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
757
758 static void ipip_destroy_tunnels(struct ipip_net *ipn)
759 {
760         int prio;
761
762         for (prio = 1; prio < 4; prio++) {
763                 int h;
764                 for (h = 0; h < HASH_SIZE; h++) {
765                         struct ip_tunnel *t;
766                         while ((t = ipn->tunnels[prio][h]) != NULL)
767                                 unregister_netdevice(t->dev);
768                 }
769         }
770 }
771
772 static int ipip_init_net(struct net *net)
773 {
774         int err;
775         struct ipip_net *ipn;
776
777         err = -ENOMEM;
778         ipn = kzalloc(sizeof(struct ipip_net), GFP_KERNEL);
779         if (ipn == NULL)
780                 goto err_alloc;
781
782         err = net_assign_generic(net, ipip_net_id, ipn);
783         if (err < 0)
784                 goto err_assign;
785
786         ipn->tunnels[0] = ipn->tunnels_wc;
787         ipn->tunnels[1] = ipn->tunnels_l;
788         ipn->tunnels[2] = ipn->tunnels_r;
789         ipn->tunnels[3] = ipn->tunnels_r_l;
790
791         ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
792                                            "tunl0",
793                                            ipip_tunnel_setup);
794         if (!ipn->fb_tunnel_dev) {
795                 err = -ENOMEM;
796                 goto err_alloc_dev;
797         }
798         dev_net_set(ipn->fb_tunnel_dev, net);
799
800         ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
801
802         if ((err = register_netdev(ipn->fb_tunnel_dev)))
803                 goto err_reg_dev;
804
805         return 0;
806
807 err_reg_dev:
808         free_netdev(ipn->fb_tunnel_dev);
809 err_alloc_dev:
810         /* nothing */
811 err_assign:
812         kfree(ipn);
813 err_alloc:
814         return err;
815 }
816
817 static void ipip_exit_net(struct net *net)
818 {
819         struct ipip_net *ipn;
820
821         ipn = net_generic(net, ipip_net_id);
822         rtnl_lock();
823         ipip_destroy_tunnels(ipn);
824         unregister_netdevice(ipn->fb_tunnel_dev);
825         rtnl_unlock();
826         kfree(ipn);
827 }
828
829 static struct pernet_operations ipip_net_ops = {
830         .init = ipip_init_net,
831         .exit = ipip_exit_net,
832 };
833
834 static int __init ipip_init(void)
835 {
836         int err;
837
838         printk(banner);
839
840         if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
841                 printk(KERN_INFO "ipip init: can't register tunnel\n");
842                 return -EAGAIN;
843         }
844
845         err = register_pernet_gen_device(&ipip_net_id, &ipip_net_ops);
846         if (err)
847                 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
848
849         return err;
850 }
851
852 static void __exit ipip_fini(void)
853 {
854         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
855                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
856
857         unregister_pernet_gen_device(ipip_net_id, &ipip_net_ops);
858 }
859
860 module_init(ipip_init);
861 module_exit(ipip_fini);
862 MODULE_LICENSE("GPL");