95db732e542b5b978d8ab82f1da681cd947576c7
[safe/jmp/linux-2.6] / net / ipv4 / ipip.c
1 /*
2  *      Linux NET3:     IP/IP protocol decoder.
3  *
4  *      Authors:
5  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
6  *
7  *      Fixes:
8  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
9  *                                      a module taking up 2 pages).
10  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11  *                                      to keep ip_forward happy.
12  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
14  *              David Woodhouse :       Perform some basic ICMP handling.
15  *                                      IPIP Routing without decapsulation.
16  *              Carlos Picoto   :       GRE over IP support
17  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18  *                                      I do not want to merge them together.
19  *
20  *      This program is free software; you can redistribute it and/or
21  *      modify it under the terms of the GNU General Public License
22  *      as published by the Free Software Foundation; either version
23  *      2 of the License, or (at your option) any later version.
24  *
25  */
26
27 /* tunnel.c: an IP tunnel driver
28
29         The purpose of this driver is to provide an IP tunnel through
30         which you can tunnel network traffic transparently across subnets.
31
32         This was written by looking at Nick Holloway's dummy driver
33         Thanks for the great code!
34
35                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
36
37         Minor tweaks:
38                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
39                 dev->hard_header/hard_header_len changed to use no headers.
40                 Comments/bracketing tweaked.
41                 Made the tunnels use dev->name not tunnel: when error reporting.
42                 Added tx_dropped stat
43
44                 -Alan Cox       (alan@lxorguk.ukuu.org.uk) 21 March 95
45
46         Reworked:
47                 Changed to tunnel to destination gateway in addition to the
48                         tunnel's pointopoint address
49                 Almost completely rewritten
50                 Note:  There is currently no firewall or ICMP handling done.
51
52                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
53
54 */
55
56 /* Things I wish I had known when writing the tunnel driver:
57
58         When the tunnel_xmit() function is called, the skb contains the
59         packet to be sent (plus a great deal of extra info), and dev
60         contains the tunnel device that _we_ are.
61
62         When we are passed a packet, we are expected to fill in the
63         source address with our source IP address.
64
65         What is the proper way to allocate, copy and free a buffer?
66         After you allocate it, it is a "0 length" chunk of memory
67         starting at zero.  If you want to add headers to the buffer
68         later, you'll have to call "skb_reserve(skb, amount)" with
69         the amount of memory you want reserved.  Then, you call
70         "skb_put(skb, amount)" with the amount of space you want in
71         the buffer.  skb_put() returns a pointer to the top (#0) of
72         that buffer.  skb->len is set to the amount of space you have
73         "allocated" with skb_put().  You can then write up to skb->len
74         bytes to that buffer.  If you need more, you can call skb_put()
75         again with the additional amount of space you need.  You can
76         find out how much more space you can allocate by calling
77         "skb_tailroom(skb)".
78         Now, to add header space, call "skb_push(skb, header_len)".
79         This creates space at the beginning of the buffer and returns
80         a pointer to this new space.  If later you need to strip a
81         header from a buffer, call "skb_pull(skb, header_len)".
82         skb_headroom() will return how much space is left at the top
83         of the buffer (before the main data).  Remember, this headroom
84         space must be reserved before the skb_put() function is called.
85         */
86
87 /*
88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89
90    For comments look at net/ipv4/ip_gre.c --ANK
91  */
92
93
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <asm/uaccess.h>
99 #include <linux/skbuff.h>
100 #include <linux/netdevice.h>
101 #include <linux/in.h>
102 #include <linux/tcp.h>
103 #include <linux/udp.h>
104 #include <linux/if_arp.h>
105 #include <linux/mroute.h>
106 #include <linux/init.h>
107 #include <linux/netfilter_ipv4.h>
108 #include <linux/if_ether.h>
109
110 #include <net/sock.h>
111 #include <net/ip.h>
112 #include <net/icmp.h>
113 #include <net/ipip.h>
114 #include <net/inet_ecn.h>
115 #include <net/xfrm.h>
116 #include <net/net_namespace.h>
117 #include <net/netns/generic.h>
118
119 #define HASH_SIZE  16
120 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
121
122 static int ipip_net_id __read_mostly;
123 struct ipip_net {
124         struct ip_tunnel *tunnels_r_l[HASH_SIZE];
125         struct ip_tunnel *tunnels_r[HASH_SIZE];
126         struct ip_tunnel *tunnels_l[HASH_SIZE];
127         struct ip_tunnel *tunnels_wc[1];
128         struct ip_tunnel **tunnels[4];
129
130         struct net_device *fb_tunnel_dev;
131 };
132
133 static void ipip_tunnel_init(struct net_device *dev);
134 static void ipip_tunnel_setup(struct net_device *dev);
135
136 /*
137  * Locking : hash tables are protected by RCU and a spinlock
138  */
139 static DEFINE_SPINLOCK(ipip_lock);
140
141 #define for_each_ip_tunnel_rcu(start) \
142         for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
143
144 static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
145                 __be32 remote, __be32 local)
146 {
147         unsigned h0 = HASH(remote);
148         unsigned h1 = HASH(local);
149         struct ip_tunnel *t;
150         struct ipip_net *ipn = net_generic(net, ipip_net_id);
151
152         for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
153                 if (local == t->parms.iph.saddr &&
154                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
155                         return t;
156
157         for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
158                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
159                         return t;
160
161         for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
162                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
163                         return t;
164
165         t = rcu_dereference(ipn->tunnels_wc[0]);
166         if (t && (t->dev->flags&IFF_UP))
167                 return t;
168         return NULL;
169 }
170
171 static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn,
172                 struct ip_tunnel_parm *parms)
173 {
174         __be32 remote = parms->iph.daddr;
175         __be32 local = parms->iph.saddr;
176         unsigned h = 0;
177         int prio = 0;
178
179         if (remote) {
180                 prio |= 2;
181                 h ^= HASH(remote);
182         }
183         if (local) {
184                 prio |= 1;
185                 h ^= HASH(local);
186         }
187         return &ipn->tunnels[prio][h];
188 }
189
190 static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn,
191                 struct ip_tunnel *t)
192 {
193         return __ipip_bucket(ipn, &t->parms);
194 }
195
196 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
197 {
198         struct ip_tunnel **tp;
199
200         for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) {
201                 if (t == *tp) {
202                         spin_lock_bh(&ipip_lock);
203                         *tp = t->next;
204                         spin_unlock_bh(&ipip_lock);
205                         break;
206                 }
207         }
208 }
209
210 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
211 {
212         struct ip_tunnel **tp = ipip_bucket(ipn, t);
213
214         spin_lock_bh(&ipip_lock);
215         t->next = *tp;
216         rcu_assign_pointer(*tp, t);
217         spin_unlock_bh(&ipip_lock);
218 }
219
220 static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
221                 struct ip_tunnel_parm *parms, int create)
222 {
223         __be32 remote = parms->iph.daddr;
224         __be32 local = parms->iph.saddr;
225         struct ip_tunnel *t, **tp, *nt;
226         struct net_device *dev;
227         char name[IFNAMSIZ];
228         struct ipip_net *ipn = net_generic(net, ipip_net_id);
229
230         for (tp = __ipip_bucket(ipn, parms); (t = *tp) != NULL; tp = &t->next) {
231                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
232                         return t;
233         }
234         if (!create)
235                 return NULL;
236
237         if (parms->name[0])
238                 strlcpy(name, parms->name, IFNAMSIZ);
239         else
240                 sprintf(name, "tunl%%d");
241
242         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
243         if (dev == NULL)
244                 return NULL;
245
246         dev_net_set(dev, net);
247
248         if (strchr(name, '%')) {
249                 if (dev_alloc_name(dev, name) < 0)
250                         goto failed_free;
251         }
252
253         nt = netdev_priv(dev);
254         nt->parms = *parms;
255
256         ipip_tunnel_init(dev);
257
258         if (register_netdevice(dev) < 0)
259                 goto failed_free;
260
261         dev_hold(dev);
262         ipip_tunnel_link(ipn, nt);
263         return nt;
264
265 failed_free:
266         free_netdev(dev);
267         return NULL;
268 }
269
270 static void ipip_tunnel_uninit(struct net_device *dev)
271 {
272         struct net *net = dev_net(dev);
273         struct ipip_net *ipn = net_generic(net, ipip_net_id);
274
275         if (dev == ipn->fb_tunnel_dev) {
276                 spin_lock_bh(&ipip_lock);
277                 ipn->tunnels_wc[0] = NULL;
278                 spin_unlock_bh(&ipip_lock);
279         } else
280                 ipip_tunnel_unlink(ipn, netdev_priv(dev));
281         dev_put(dev);
282 }
283
284 static int ipip_err(struct sk_buff *skb, u32 info)
285 {
286
287 /* All the routers (except for Linux) return only
288    8 bytes of packet payload. It means, that precise relaying of
289    ICMP in the real Internet is absolutely infeasible.
290  */
291         struct iphdr *iph = (struct iphdr *)skb->data;
292         const int type = icmp_hdr(skb)->type;
293         const int code = icmp_hdr(skb)->code;
294         struct ip_tunnel *t;
295         int err;
296
297         switch (type) {
298         default:
299         case ICMP_PARAMETERPROB:
300                 return 0;
301
302         case ICMP_DEST_UNREACH:
303                 switch (code) {
304                 case ICMP_SR_FAILED:
305                 case ICMP_PORT_UNREACH:
306                         /* Impossible event. */
307                         return 0;
308                 case ICMP_FRAG_NEEDED:
309                         /* Soft state for pmtu is maintained by IP core. */
310                         return 0;
311                 default:
312                         /* All others are translated to HOST_UNREACH.
313                            rfc2003 contains "deep thoughts" about NET_UNREACH,
314                            I believe they are just ether pollution. --ANK
315                          */
316                         break;
317                 }
318                 break;
319         case ICMP_TIME_EXCEEDED:
320                 if (code != ICMP_EXC_TTL)
321                         return 0;
322                 break;
323         }
324
325         err = -ENOENT;
326
327         rcu_read_lock();
328         t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
329         if (t == NULL || t->parms.iph.daddr == 0)
330                 goto out;
331
332         err = 0;
333         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
334                 goto out;
335
336         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
337                 t->err_count++;
338         else
339                 t->err_count = 1;
340         t->err_time = jiffies;
341 out:
342         rcu_read_unlock();
343         return err;
344 }
345
346 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
347                                         struct sk_buff *skb)
348 {
349         struct iphdr *inner_iph = ip_hdr(skb);
350
351         if (INET_ECN_is_ce(outer_iph->tos))
352                 IP_ECN_set_ce(inner_iph);
353 }
354
355 static int ipip_rcv(struct sk_buff *skb)
356 {
357         struct ip_tunnel *tunnel;
358         const struct iphdr *iph = ip_hdr(skb);
359
360         rcu_read_lock();
361         if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev),
362                                         iph->saddr, iph->daddr)) != NULL) {
363                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
364                         rcu_read_unlock();
365                         kfree_skb(skb);
366                         return 0;
367                 }
368
369                 secpath_reset(skb);
370
371                 skb->mac_header = skb->network_header;
372                 skb_reset_network_header(skb);
373                 skb->protocol = htons(ETH_P_IP);
374                 skb->pkt_type = PACKET_HOST;
375
376                 tunnel->dev->stats.rx_packets++;
377                 tunnel->dev->stats.rx_bytes += skb->len;
378                 skb->dev = tunnel->dev;
379                 skb_dst_drop(skb);
380                 nf_reset(skb);
381                 ipip_ecn_decapsulate(iph, skb);
382                 netif_rx(skb);
383                 rcu_read_unlock();
384                 return 0;
385         }
386         rcu_read_unlock();
387
388         return -1;
389 }
390
391 /*
392  *      This function assumes it is being called from dev_queue_xmit()
393  *      and that skb is filled properly by that function.
394  */
395
396 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
397 {
398         struct ip_tunnel *tunnel = netdev_priv(dev);
399         struct net_device_stats *stats = &dev->stats;
400         struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
401         struct iphdr  *tiph = &tunnel->parms.iph;
402         u8     tos = tunnel->parms.iph.tos;
403         __be16 df = tiph->frag_off;
404         struct rtable *rt;                      /* Route to the other host */
405         struct net_device *tdev;                        /* Device to other host */
406         struct iphdr  *old_iph = ip_hdr(skb);
407         struct iphdr  *iph;                     /* Our new IP header */
408         unsigned int max_headroom;              /* The extra header space needed */
409         __be32 dst = tiph->daddr;
410         int    mtu;
411
412         if (skb->protocol != htons(ETH_P_IP))
413                 goto tx_error;
414
415         if (tos&1)
416                 tos = old_iph->tos;
417
418         if (!dst) {
419                 /* NBMA tunnel */
420                 if ((rt = skb_rtable(skb)) == NULL) {
421                         stats->tx_fifo_errors++;
422                         goto tx_error;
423                 }
424                 if ((dst = rt->rt_gateway) == 0)
425                         goto tx_error_icmp;
426         }
427
428         {
429                 struct flowi fl = { .oif = tunnel->parms.link,
430                                     .nl_u = { .ip4_u =
431                                               { .daddr = dst,
432                                                 .saddr = tiph->saddr,
433                                                 .tos = RT_TOS(tos) } },
434                                     .proto = IPPROTO_IPIP };
435                 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
436                         stats->tx_carrier_errors++;
437                         goto tx_error_icmp;
438                 }
439         }
440         tdev = rt->u.dst.dev;
441
442         if (tdev == dev) {
443                 ip_rt_put(rt);
444                 stats->collisions++;
445                 goto tx_error;
446         }
447
448         df |= old_iph->frag_off & htons(IP_DF);
449
450         if (df) {
451                 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
452
453                 if (mtu < 68) {
454                         stats->collisions++;
455                         ip_rt_put(rt);
456                         goto tx_error;
457                 }
458
459                 if (skb_dst(skb))
460                         skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
461
462                 if ((old_iph->frag_off & htons(IP_DF)) &&
463                     mtu < ntohs(old_iph->tot_len)) {
464                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
465                                   htonl(mtu));
466                         ip_rt_put(rt);
467                         goto tx_error;
468                 }
469         }
470
471         if (tunnel->err_count > 0) {
472                 if (time_before(jiffies,
473                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
474                         tunnel->err_count--;
475                         dst_link_failure(skb);
476                 } else
477                         tunnel->err_count = 0;
478         }
479
480         /*
481          * Okay, now see if we can stuff it in the buffer as-is.
482          */
483         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
484
485         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
486             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
487                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
488                 if (!new_skb) {
489                         ip_rt_put(rt);
490                         txq->tx_dropped++;
491                         dev_kfree_skb(skb);
492                         return NETDEV_TX_OK;
493                 }
494                 if (skb->sk)
495                         skb_set_owner_w(new_skb, skb->sk);
496                 dev_kfree_skb(skb);
497                 skb = new_skb;
498                 old_iph = ip_hdr(skb);
499         }
500
501         skb->transport_header = skb->network_header;
502         skb_push(skb, sizeof(struct iphdr));
503         skb_reset_network_header(skb);
504         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
505         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
506                               IPSKB_REROUTED);
507         skb_dst_drop(skb);
508         skb_dst_set(skb, &rt->u.dst);
509
510         /*
511          *      Push down and install the IPIP header.
512          */
513
514         iph                     =       ip_hdr(skb);
515         iph->version            =       4;
516         iph->ihl                =       sizeof(struct iphdr)>>2;
517         iph->frag_off           =       df;
518         iph->protocol           =       IPPROTO_IPIP;
519         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
520         iph->daddr              =       rt->rt_dst;
521         iph->saddr              =       rt->rt_src;
522
523         if ((iph->ttl = tiph->ttl) == 0)
524                 iph->ttl        =       old_iph->ttl;
525
526         nf_reset(skb);
527
528         IPTUNNEL_XMIT();
529         return NETDEV_TX_OK;
530
531 tx_error_icmp:
532         dst_link_failure(skb);
533 tx_error:
534         stats->tx_errors++;
535         dev_kfree_skb(skb);
536         return NETDEV_TX_OK;
537 }
538
539 static void ipip_tunnel_bind_dev(struct net_device *dev)
540 {
541         struct net_device *tdev = NULL;
542         struct ip_tunnel *tunnel;
543         struct iphdr *iph;
544
545         tunnel = netdev_priv(dev);
546         iph = &tunnel->parms.iph;
547
548         if (iph->daddr) {
549                 struct flowi fl = { .oif = tunnel->parms.link,
550                                     .nl_u = { .ip4_u =
551                                               { .daddr = iph->daddr,
552                                                 .saddr = iph->saddr,
553                                                 .tos = RT_TOS(iph->tos) } },
554                                     .proto = IPPROTO_IPIP };
555                 struct rtable *rt;
556                 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
557                         tdev = rt->u.dst.dev;
558                         ip_rt_put(rt);
559                 }
560                 dev->flags |= IFF_POINTOPOINT;
561         }
562
563         if (!tdev && tunnel->parms.link)
564                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
565
566         if (tdev) {
567                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
568                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
569         }
570         dev->iflink = tunnel->parms.link;
571 }
572
573 static int
574 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
575 {
576         int err = 0;
577         struct ip_tunnel_parm p;
578         struct ip_tunnel *t;
579         struct net *net = dev_net(dev);
580         struct ipip_net *ipn = net_generic(net, ipip_net_id);
581
582         switch (cmd) {
583         case SIOCGETTUNNEL:
584                 t = NULL;
585                 if (dev == ipn->fb_tunnel_dev) {
586                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
587                                 err = -EFAULT;
588                                 break;
589                         }
590                         t = ipip_tunnel_locate(net, &p, 0);
591                 }
592                 if (t == NULL)
593                         t = netdev_priv(dev);
594                 memcpy(&p, &t->parms, sizeof(p));
595                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
596                         err = -EFAULT;
597                 break;
598
599         case SIOCADDTUNNEL:
600         case SIOCCHGTUNNEL:
601                 err = -EPERM;
602                 if (!capable(CAP_NET_ADMIN))
603                         goto done;
604
605                 err = -EFAULT;
606                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
607                         goto done;
608
609                 err = -EINVAL;
610                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
611                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
612                         goto done;
613                 if (p.iph.ttl)
614                         p.iph.frag_off |= htons(IP_DF);
615
616                 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
617
618                 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
619                         if (t != NULL) {
620                                 if (t->dev != dev) {
621                                         err = -EEXIST;
622                                         break;
623                                 }
624                         } else {
625                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
626                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
627                                         err = -EINVAL;
628                                         break;
629                                 }
630                                 t = netdev_priv(dev);
631                                 ipip_tunnel_unlink(ipn, t);
632                                 t->parms.iph.saddr = p.iph.saddr;
633                                 t->parms.iph.daddr = p.iph.daddr;
634                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
635                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
636                                 ipip_tunnel_link(ipn, t);
637                                 netdev_state_change(dev);
638                         }
639                 }
640
641                 if (t) {
642                         err = 0;
643                         if (cmd == SIOCCHGTUNNEL) {
644                                 t->parms.iph.ttl = p.iph.ttl;
645                                 t->parms.iph.tos = p.iph.tos;
646                                 t->parms.iph.frag_off = p.iph.frag_off;
647                                 if (t->parms.link != p.link) {
648                                         t->parms.link = p.link;
649                                         ipip_tunnel_bind_dev(dev);
650                                         netdev_state_change(dev);
651                                 }
652                         }
653                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
654                                 err = -EFAULT;
655                 } else
656                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
657                 break;
658
659         case SIOCDELTUNNEL:
660                 err = -EPERM;
661                 if (!capable(CAP_NET_ADMIN))
662                         goto done;
663
664                 if (dev == ipn->fb_tunnel_dev) {
665                         err = -EFAULT;
666                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
667                                 goto done;
668                         err = -ENOENT;
669                         if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
670                                 goto done;
671                         err = -EPERM;
672                         if (t->dev == ipn->fb_tunnel_dev)
673                                 goto done;
674                         dev = t->dev;
675                 }
676                 unregister_netdevice(dev);
677                 err = 0;
678                 break;
679
680         default:
681                 err = -EINVAL;
682         }
683
684 done:
685         return err;
686 }
687
688 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
689 {
690         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
691                 return -EINVAL;
692         dev->mtu = new_mtu;
693         return 0;
694 }
695
696 static const struct net_device_ops ipip_netdev_ops = {
697         .ndo_uninit     = ipip_tunnel_uninit,
698         .ndo_start_xmit = ipip_tunnel_xmit,
699         .ndo_do_ioctl   = ipip_tunnel_ioctl,
700         .ndo_change_mtu = ipip_tunnel_change_mtu,
701
702 };
703
704 static void ipip_tunnel_setup(struct net_device *dev)
705 {
706         dev->netdev_ops         = &ipip_netdev_ops;
707         dev->destructor         = free_netdev;
708
709         dev->type               = ARPHRD_TUNNEL;
710         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
711         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
712         dev->flags              = IFF_NOARP;
713         dev->iflink             = 0;
714         dev->addr_len           = 4;
715         dev->features           |= NETIF_F_NETNS_LOCAL;
716         dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
717 }
718
719 static void ipip_tunnel_init(struct net_device *dev)
720 {
721         struct ip_tunnel *tunnel = netdev_priv(dev);
722
723         tunnel->dev = dev;
724         strcpy(tunnel->parms.name, dev->name);
725
726         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
727         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
728
729         ipip_tunnel_bind_dev(dev);
730 }
731
732 static void __net_init ipip_fb_tunnel_init(struct net_device *dev)
733 {
734         struct ip_tunnel *tunnel = netdev_priv(dev);
735         struct iphdr *iph = &tunnel->parms.iph;
736         struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
737
738         tunnel->dev = dev;
739         strcpy(tunnel->parms.name, dev->name);
740
741         iph->version            = 4;
742         iph->protocol           = IPPROTO_IPIP;
743         iph->ihl                = 5;
744
745         dev_hold(dev);
746         ipn->tunnels_wc[0]      = tunnel;
747 }
748
749 static struct xfrm_tunnel ipip_handler = {
750         .handler        =       ipip_rcv,
751         .err_handler    =       ipip_err,
752         .priority       =       1,
753 };
754
755 static const char banner[] __initconst =
756         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
757
758 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
759 {
760         int prio;
761
762         for (prio = 1; prio < 4; prio++) {
763                 int h;
764                 for (h = 0; h < HASH_SIZE; h++) {
765                         struct ip_tunnel *t = ipn->tunnels[prio][h];
766
767                         while (t != NULL) {
768                                 unregister_netdevice_queue(t->dev, head);
769                                 t = t->next;
770                         }
771                 }
772         }
773 }
774
775 static int __net_init ipip_init_net(struct net *net)
776 {
777         struct ipip_net *ipn = net_generic(net, ipip_net_id);
778         int err;
779
780         ipn->tunnels[0] = ipn->tunnels_wc;
781         ipn->tunnels[1] = ipn->tunnels_l;
782         ipn->tunnels[2] = ipn->tunnels_r;
783         ipn->tunnels[3] = ipn->tunnels_r_l;
784
785         ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
786                                            "tunl0",
787                                            ipip_tunnel_setup);
788         if (!ipn->fb_tunnel_dev) {
789                 err = -ENOMEM;
790                 goto err_alloc_dev;
791         }
792         dev_net_set(ipn->fb_tunnel_dev, net);
793
794         ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
795
796         if ((err = register_netdev(ipn->fb_tunnel_dev)))
797                 goto err_reg_dev;
798
799         return 0;
800
801 err_reg_dev:
802         free_netdev(ipn->fb_tunnel_dev);
803 err_alloc_dev:
804         /* nothing */
805         return err;
806 }
807
808 static void __net_exit ipip_exit_net(struct net *net)
809 {
810         struct ipip_net *ipn = net_generic(net, ipip_net_id);
811         LIST_HEAD(list);
812
813         rtnl_lock();
814         ipip_destroy_tunnels(ipn, &list);
815         unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
816         unregister_netdevice_many(&list);
817         rtnl_unlock();
818 }
819
820 static struct pernet_operations ipip_net_ops = {
821         .init = ipip_init_net,
822         .exit = ipip_exit_net,
823         .id   = &ipip_net_id,
824         .size = sizeof(struct ipip_net),
825 };
826
827 static int __init ipip_init(void)
828 {
829         int err;
830
831         printk(banner);
832
833         if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
834                 printk(KERN_INFO "ipip init: can't register tunnel\n");
835                 return -EAGAIN;
836         }
837
838         err = register_pernet_device(&ipip_net_ops);
839         if (err)
840                 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
841
842         return err;
843 }
844
845 static void __exit ipip_fini(void)
846 {
847         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
848                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
849
850         unregister_pernet_device(&ipip_net_ops);
851 }
852
853 module_init(ipip_init);
854 module_exit(ipip_fini);
855 MODULE_LICENSE("GPL");