net: skb->dst accessors
[safe/jmp/linux-2.6] / net / netfilter / ipvs / ip_vs_xmit.c
1 /*
2  * ip_vs_xmit.c: various packet transmitters for IPVS
3  *
4  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
5  *              Julian Anastasov <ja@ssi.bg>
6  *
7  *              This program is free software; you can redistribute it and/or
8  *              modify it under the terms of the GNU General Public License
9  *              as published by the Free Software Foundation; either version
10  *              2 of the License, or (at your option) any later version.
11  *
12  * Changes:
13  *
14  */
15
16 #include <linux/kernel.h>
17 #include <linux/tcp.h>                  /* for tcphdr */
18 #include <net/ip.h>
19 #include <net/tcp.h>                    /* for csum_tcpudp_magic */
20 #include <net/udp.h>
21 #include <net/icmp.h>                   /* for icmp_send */
22 #include <net/route.h>                  /* for ip_route_output */
23 #include <net/ipv6.h>
24 #include <net/ip6_route.h>
25 #include <linux/icmpv6.h>
26 #include <linux/netfilter.h>
27 #include <linux/netfilter_ipv4.h>
28
29 #include <net/ip_vs.h>
30
31
32 /*
33  *      Destination cache to speed up outgoing route lookup
34  */
35 static inline void
36 __ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst)
37 {
38         struct dst_entry *old_dst;
39
40         old_dst = dest->dst_cache;
41         dest->dst_cache = dst;
42         dest->dst_rtos = rtos;
43         dst_release(old_dst);
44 }
45
46 static inline struct dst_entry *
47 __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
48 {
49         struct dst_entry *dst = dest->dst_cache;
50
51         if (!dst)
52                 return NULL;
53         if ((dst->obsolete
54              || (dest->af == AF_INET && rtos != dest->dst_rtos)) &&
55             dst->ops->check(dst, cookie) == NULL) {
56                 dest->dst_cache = NULL;
57                 dst_release(dst);
58                 return NULL;
59         }
60         dst_hold(dst);
61         return dst;
62 }
63
64 static struct rtable *
65 __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
66 {
67         struct rtable *rt;                      /* Route to the other host */
68         struct ip_vs_dest *dest = cp->dest;
69
70         if (dest) {
71                 spin_lock(&dest->dst_lock);
72                 if (!(rt = (struct rtable *)
73                       __ip_vs_dst_check(dest, rtos, 0))) {
74                         struct flowi fl = {
75                                 .oif = 0,
76                                 .nl_u = {
77                                         .ip4_u = {
78                                                 .daddr = dest->addr.ip,
79                                                 .saddr = 0,
80                                                 .tos = rtos, } },
81                         };
82
83                         if (ip_route_output_key(&init_net, &rt, &fl)) {
84                                 spin_unlock(&dest->dst_lock);
85                                 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
86                                              &dest->addr.ip);
87                                 return NULL;
88                         }
89                         __ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst));
90                         IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n",
91                                   &dest->addr.ip,
92                                   atomic_read(&rt->u.dst.__refcnt), rtos);
93                 }
94                 spin_unlock(&dest->dst_lock);
95         } else {
96                 struct flowi fl = {
97                         .oif = 0,
98                         .nl_u = {
99                                 .ip4_u = {
100                                         .daddr = cp->daddr.ip,
101                                         .saddr = 0,
102                                         .tos = rtos, } },
103                 };
104
105                 if (ip_route_output_key(&init_net, &rt, &fl)) {
106                         IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
107                                      &cp->daddr.ip);
108                         return NULL;
109                 }
110         }
111
112         return rt;
113 }
114
115 #ifdef CONFIG_IP_VS_IPV6
116 static struct rt6_info *
117 __ip_vs_get_out_rt_v6(struct ip_vs_conn *cp)
118 {
119         struct rt6_info *rt;                    /* Route to the other host */
120         struct ip_vs_dest *dest = cp->dest;
121
122         if (dest) {
123                 spin_lock(&dest->dst_lock);
124                 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0, 0);
125                 if (!rt) {
126                         struct flowi fl = {
127                                 .oif = 0,
128                                 .nl_u = {
129                                         .ip6_u = {
130                                                 .daddr = dest->addr.in6,
131                                                 .saddr = {
132                                                         .s6_addr32 =
133                                                                 { 0, 0, 0, 0 },
134                                                 },
135                                         },
136                                 },
137                         };
138
139                         rt = (struct rt6_info *)ip6_route_output(&init_net,
140                                                                  NULL, &fl);
141                         if (!rt) {
142                                 spin_unlock(&dest->dst_lock);
143                                 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
144                                              &dest->addr.in6);
145                                 return NULL;
146                         }
147                         __ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst));
148                         IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n",
149                                   &dest->addr.in6,
150                                   atomic_read(&rt->u.dst.__refcnt));
151                 }
152                 spin_unlock(&dest->dst_lock);
153         } else {
154                 struct flowi fl = {
155                         .oif = 0,
156                         .nl_u = {
157                                 .ip6_u = {
158                                         .daddr = cp->daddr.in6,
159                                         .saddr = {
160                                                 .s6_addr32 = { 0, 0, 0, 0 },
161                                         },
162                                 },
163                         },
164                 };
165
166                 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
167                 if (!rt) {
168                         IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
169                                      &cp->daddr.in6);
170                         return NULL;
171                 }
172         }
173
174         return rt;
175 }
176 #endif
177
178
179 /*
180  *      Release dest->dst_cache before a dest is removed
181  */
182 void
183 ip_vs_dst_reset(struct ip_vs_dest *dest)
184 {
185         struct dst_entry *old_dst;
186
187         old_dst = dest->dst_cache;
188         dest->dst_cache = NULL;
189         dst_release(old_dst);
190 }
191
192 #define IP_VS_XMIT(pf, skb, rt)                         \
193 do {                                                    \
194         (skb)->ipvs_property = 1;                       \
195         skb_forward_csum(skb);                          \
196         NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,     \
197                 (rt)->u.dst.dev, dst_output);           \
198 } while (0)
199
200
201 /*
202  *      NULL transmitter (do nothing except return NF_ACCEPT)
203  */
204 int
205 ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
206                 struct ip_vs_protocol *pp)
207 {
208         /* we do not touch skb and do not need pskb ptr */
209         return NF_ACCEPT;
210 }
211
212
213 /*
214  *      Bypass transmitter
215  *      Let packets bypass the destination when the destination is not
216  *      available, it may be only used in transparent cache cluster.
217  */
218 int
219 ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
220                   struct ip_vs_protocol *pp)
221 {
222         struct rtable *rt;                      /* Route to the other host */
223         struct iphdr  *iph = ip_hdr(skb);
224         u8     tos = iph->tos;
225         int    mtu;
226         struct flowi fl = {
227                 .oif = 0,
228                 .nl_u = {
229                         .ip4_u = {
230                                 .daddr = iph->daddr,
231                                 .saddr = 0,
232                                 .tos = RT_TOS(tos), } },
233         };
234
235         EnterFunction(10);
236
237         if (ip_route_output_key(&init_net, &rt, &fl)) {
238                 IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, dest: %pI4\n",
239                              &iph->daddr);
240                 goto tx_error_icmp;
241         }
242
243         /* MTU checking */
244         mtu = dst_mtu(&rt->u.dst);
245         if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
246                 ip_rt_put(rt);
247                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
248                 IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n");
249                 goto tx_error;
250         }
251
252         /*
253          * Call ip_send_check because we are not sure it is called
254          * after ip_defrag. Is copy-on-write needed?
255          */
256         if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
257                 ip_rt_put(rt);
258                 return NF_STOLEN;
259         }
260         ip_send_check(ip_hdr(skb));
261
262         /* drop old route */
263         skb_dst_drop(skb);
264         skb_dst_set(skb, &rt->u.dst);
265
266         /* Another hack: avoid icmp_send in ip_fragment */
267         skb->local_df = 1;
268
269         IP_VS_XMIT(PF_INET, skb, rt);
270
271         LeaveFunction(10);
272         return NF_STOLEN;
273
274  tx_error_icmp:
275         dst_link_failure(skb);
276  tx_error:
277         kfree_skb(skb);
278         LeaveFunction(10);
279         return NF_STOLEN;
280 }
281
282 #ifdef CONFIG_IP_VS_IPV6
283 int
284 ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
285                      struct ip_vs_protocol *pp)
286 {
287         struct rt6_info *rt;                    /* Route to the other host */
288         struct ipv6hdr  *iph = ipv6_hdr(skb);
289         int    mtu;
290         struct flowi fl = {
291                 .oif = 0,
292                 .nl_u = {
293                         .ip6_u = {
294                                 .daddr = iph->daddr,
295                                 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
296         };
297
298         EnterFunction(10);
299
300         rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
301         if (!rt) {
302                 IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): ip6_route_output error, dest: %pI6\n",
303                              &iph->daddr);
304                 goto tx_error_icmp;
305         }
306
307         /* MTU checking */
308         mtu = dst_mtu(&rt->u.dst);
309         if (skb->len > mtu) {
310                 dst_release(&rt->u.dst);
311                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
312                 IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): frag needed\n");
313                 goto tx_error;
314         }
315
316         /*
317          * Call ip_send_check because we are not sure it is called
318          * after ip_defrag. Is copy-on-write needed?
319          */
320         skb = skb_share_check(skb, GFP_ATOMIC);
321         if (unlikely(skb == NULL)) {
322                 dst_release(&rt->u.dst);
323                 return NF_STOLEN;
324         }
325
326         /* drop old route */
327         skb_dst_drop(skb);
328         skb_dst_set(skb, &rt->u.dst);
329
330         /* Another hack: avoid icmp_send in ip_fragment */
331         skb->local_df = 1;
332
333         IP_VS_XMIT(PF_INET6, skb, rt);
334
335         LeaveFunction(10);
336         return NF_STOLEN;
337
338  tx_error_icmp:
339         dst_link_failure(skb);
340  tx_error:
341         kfree_skb(skb);
342         LeaveFunction(10);
343         return NF_STOLEN;
344 }
345 #endif
346
347 /*
348  *      NAT transmitter (only for outside-to-inside nat forwarding)
349  *      Not used for related ICMP
350  */
351 int
352 ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
353                struct ip_vs_protocol *pp)
354 {
355         struct rtable *rt;              /* Route to the other host */
356         int mtu;
357         struct iphdr *iph = ip_hdr(skb);
358
359         EnterFunction(10);
360
361         /* check if it is a connection of no-client-port */
362         if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
363                 __be16 _pt, *p;
364                 p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
365                 if (p == NULL)
366                         goto tx_error;
367                 ip_vs_conn_fill_cport(cp, *p);
368                 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
369         }
370
371         if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
372                 goto tx_error_icmp;
373
374         /* MTU checking */
375         mtu = dst_mtu(&rt->u.dst);
376         if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
377                 ip_rt_put(rt);
378                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
379                 IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
380                 goto tx_error;
381         }
382
383         /* copy-on-write the packet before mangling it */
384         if (!skb_make_writable(skb, sizeof(struct iphdr)))
385                 goto tx_error_put;
386
387         if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
388                 goto tx_error_put;
389
390         /* drop old route */
391         skb_dst_drop(skb);
392         skb_dst_set(skb, &rt->u.dst);
393
394         /* mangle the packet */
395         if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
396                 goto tx_error;
397         ip_hdr(skb)->daddr = cp->daddr.ip;
398         ip_send_check(ip_hdr(skb));
399
400         IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
401
402         /* FIXME: when application helper enlarges the packet and the length
403            is larger than the MTU of outgoing device, there will be still
404            MTU problem. */
405
406         /* Another hack: avoid icmp_send in ip_fragment */
407         skb->local_df = 1;
408
409         IP_VS_XMIT(PF_INET, skb, rt);
410
411         LeaveFunction(10);
412         return NF_STOLEN;
413
414   tx_error_icmp:
415         dst_link_failure(skb);
416   tx_error:
417         LeaveFunction(10);
418         kfree_skb(skb);
419         return NF_STOLEN;
420   tx_error_put:
421         ip_rt_put(rt);
422         goto tx_error;
423 }
424
425 #ifdef CONFIG_IP_VS_IPV6
426 int
427 ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
428                   struct ip_vs_protocol *pp)
429 {
430         struct rt6_info *rt;            /* Route to the other host */
431         int mtu;
432
433         EnterFunction(10);
434
435         /* check if it is a connection of no-client-port */
436         if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
437                 __be16 _pt, *p;
438                 p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
439                                        sizeof(_pt), &_pt);
440                 if (p == NULL)
441                         goto tx_error;
442                 ip_vs_conn_fill_cport(cp, *p);
443                 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
444         }
445
446         rt = __ip_vs_get_out_rt_v6(cp);
447         if (!rt)
448                 goto tx_error_icmp;
449
450         /* MTU checking */
451         mtu = dst_mtu(&rt->u.dst);
452         if (skb->len > mtu) {
453                 dst_release(&rt->u.dst);
454                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
455                 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
456                                  "ip_vs_nat_xmit_v6(): frag needed for");
457                 goto tx_error;
458         }
459
460         /* copy-on-write the packet before mangling it */
461         if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
462                 goto tx_error_put;
463
464         if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
465                 goto tx_error_put;
466
467         /* drop old route */
468         skb_dst_drop(skb);
469         skb_dst_set(skb, &rt->u.dst);
470
471         /* mangle the packet */
472         if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
473                 goto tx_error;
474         ipv6_hdr(skb)->daddr = cp->daddr.in6;
475
476         IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
477
478         /* FIXME: when application helper enlarges the packet and the length
479            is larger than the MTU of outgoing device, there will be still
480            MTU problem. */
481
482         /* Another hack: avoid icmp_send in ip_fragment */
483         skb->local_df = 1;
484
485         IP_VS_XMIT(PF_INET6, skb, rt);
486
487         LeaveFunction(10);
488         return NF_STOLEN;
489
490 tx_error_icmp:
491         dst_link_failure(skb);
492 tx_error:
493         LeaveFunction(10);
494         kfree_skb(skb);
495         return NF_STOLEN;
496 tx_error_put:
497         dst_release(&rt->u.dst);
498         goto tx_error;
499 }
500 #endif
501
502
503 /*
504  *   IP Tunneling transmitter
505  *
506  *   This function encapsulates the packet in a new IP packet, its
507  *   destination will be set to cp->daddr. Most code of this function
508  *   is taken from ipip.c.
509  *
510  *   It is used in VS/TUN cluster. The load balancer selects a real
511  *   server from a cluster based on a scheduling algorithm,
512  *   encapsulates the request packet and forwards it to the selected
513  *   server. For example, all real servers are configured with
514  *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives
515  *   the encapsulated packet, it will decapsulate the packet, processe
516  *   the request and return the response packets directly to the client
517  *   without passing the load balancer. This can greatly increase the
518  *   scalability of virtual server.
519  *
520  *   Used for ANY protocol
521  */
522 int
523 ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
524                   struct ip_vs_protocol *pp)
525 {
526         struct rtable *rt;                      /* Route to the other host */
527         struct net_device *tdev;                /* Device to other host */
528         struct iphdr  *old_iph = ip_hdr(skb);
529         u8     tos = old_iph->tos;
530         __be16 df = old_iph->frag_off;
531         sk_buff_data_t old_transport_header = skb->transport_header;
532         struct iphdr  *iph;                     /* Our new IP header */
533         unsigned int max_headroom;              /* The extra header space needed */
534         int    mtu;
535
536         EnterFunction(10);
537
538         if (skb->protocol != htons(ETH_P_IP)) {
539                 IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, "
540                              "ETH_P_IP: %d, skb protocol: %d\n",
541                              htons(ETH_P_IP), skb->protocol);
542                 goto tx_error;
543         }
544
545         if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))
546                 goto tx_error_icmp;
547
548         tdev = rt->u.dst.dev;
549
550         mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
551         if (mtu < 68) {
552                 ip_rt_put(rt);
553                 IP_VS_DBG_RL("ip_vs_tunnel_xmit(): mtu less than 68\n");
554                 goto tx_error;
555         }
556         if (skb_dst(skb))
557                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
558
559         df |= (old_iph->frag_off & htons(IP_DF));
560
561         if ((old_iph->frag_off & htons(IP_DF))
562             && mtu < ntohs(old_iph->tot_len)) {
563                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
564                 ip_rt_put(rt);
565                 IP_VS_DBG_RL("ip_vs_tunnel_xmit(): frag needed\n");
566                 goto tx_error;
567         }
568
569         /*
570          * Okay, now see if we can stuff it in the buffer as-is.
571          */
572         max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
573
574         if (skb_headroom(skb) < max_headroom
575             || skb_cloned(skb) || skb_shared(skb)) {
576                 struct sk_buff *new_skb =
577                         skb_realloc_headroom(skb, max_headroom);
578                 if (!new_skb) {
579                         ip_rt_put(rt);
580                         kfree_skb(skb);
581                         IP_VS_ERR_RL("ip_vs_tunnel_xmit(): no memory\n");
582                         return NF_STOLEN;
583                 }
584                 kfree_skb(skb);
585                 skb = new_skb;
586                 old_iph = ip_hdr(skb);
587         }
588
589         skb->transport_header = old_transport_header;
590
591         /* fix old IP header checksum */
592         ip_send_check(old_iph);
593
594         skb_push(skb, sizeof(struct iphdr));
595         skb_reset_network_header(skb);
596         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
597
598         /* drop old route */
599         skb_dst_drop(skb);
600         skb_dst_set(skb, &rt->u.dst);
601
602         /*
603          *      Push down and install the IPIP header.
604          */
605         iph                     =       ip_hdr(skb);
606         iph->version            =       4;
607         iph->ihl                =       sizeof(struct iphdr)>>2;
608         iph->frag_off           =       df;
609         iph->protocol           =       IPPROTO_IPIP;
610         iph->tos                =       tos;
611         iph->daddr              =       rt->rt_dst;
612         iph->saddr              =       rt->rt_src;
613         iph->ttl                =       old_iph->ttl;
614         ip_select_ident(iph, &rt->u.dst, NULL);
615
616         /* Another hack: avoid icmp_send in ip_fragment */
617         skb->local_df = 1;
618
619         ip_local_out(skb);
620
621         LeaveFunction(10);
622
623         return NF_STOLEN;
624
625   tx_error_icmp:
626         dst_link_failure(skb);
627   tx_error:
628         kfree_skb(skb);
629         LeaveFunction(10);
630         return NF_STOLEN;
631 }
632
633 #ifdef CONFIG_IP_VS_IPV6
634 int
635 ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
636                      struct ip_vs_protocol *pp)
637 {
638         struct rt6_info *rt;            /* Route to the other host */
639         struct net_device *tdev;        /* Device to other host */
640         struct ipv6hdr  *old_iph = ipv6_hdr(skb);
641         sk_buff_data_t old_transport_header = skb->transport_header;
642         struct ipv6hdr  *iph;           /* Our new IP header */
643         unsigned int max_headroom;      /* The extra header space needed */
644         int    mtu;
645
646         EnterFunction(10);
647
648         if (skb->protocol != htons(ETH_P_IPV6)) {
649                 IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): protocol error, "
650                              "ETH_P_IPV6: %d, skb protocol: %d\n",
651                              htons(ETH_P_IPV6), skb->protocol);
652                 goto tx_error;
653         }
654
655         rt = __ip_vs_get_out_rt_v6(cp);
656         if (!rt)
657                 goto tx_error_icmp;
658
659         tdev = rt->u.dst.dev;
660
661         mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr);
662         /* TODO IPv6: do we need this check in IPv6? */
663         if (mtu < 1280) {
664                 dst_release(&rt->u.dst);
665                 IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): mtu less than 1280\n");
666                 goto tx_error;
667         }
668         if (skb_dst(skb))
669                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
670
671         if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
672                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
673                 dst_release(&rt->u.dst);
674                 IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): frag needed\n");
675                 goto tx_error;
676         }
677
678         /*
679          * Okay, now see if we can stuff it in the buffer as-is.
680          */
681         max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
682
683         if (skb_headroom(skb) < max_headroom
684             || skb_cloned(skb) || skb_shared(skb)) {
685                 struct sk_buff *new_skb =
686                         skb_realloc_headroom(skb, max_headroom);
687                 if (!new_skb) {
688                         dst_release(&rt->u.dst);
689                         kfree_skb(skb);
690                         IP_VS_ERR_RL("ip_vs_tunnel_xmit_v6(): no memory\n");
691                         return NF_STOLEN;
692                 }
693                 kfree_skb(skb);
694                 skb = new_skb;
695                 old_iph = ipv6_hdr(skb);
696         }
697
698         skb->transport_header = old_transport_header;
699
700         skb_push(skb, sizeof(struct ipv6hdr));
701         skb_reset_network_header(skb);
702         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
703
704         /* drop old route */
705         skb_dst_drop(skb);
706         skb_dst_set(skb, &rt->u.dst);
707
708         /*
709          *      Push down and install the IPIP header.
710          */
711         iph                     =       ipv6_hdr(skb);
712         iph->version            =       6;
713         iph->nexthdr            =       IPPROTO_IPV6;
714         iph->payload_len        =       old_iph->payload_len;
715         be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
716         iph->priority           =       old_iph->priority;
717         memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
718         iph->daddr              =       rt->rt6i_dst.addr;
719         iph->saddr              =       cp->vaddr.in6; /* rt->rt6i_src.addr; */
720         iph->hop_limit          =       old_iph->hop_limit;
721
722         /* Another hack: avoid icmp_send in ip_fragment */
723         skb->local_df = 1;
724
725         ip6_local_out(skb);
726
727         LeaveFunction(10);
728
729         return NF_STOLEN;
730
731 tx_error_icmp:
732         dst_link_failure(skb);
733 tx_error:
734         kfree_skb(skb);
735         LeaveFunction(10);
736         return NF_STOLEN;
737 }
738 #endif
739
740
741 /*
742  *      Direct Routing transmitter
743  *      Used for ANY protocol
744  */
745 int
746 ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
747               struct ip_vs_protocol *pp)
748 {
749         struct rtable *rt;                      /* Route to the other host */
750         struct iphdr  *iph = ip_hdr(skb);
751         int    mtu;
752
753         EnterFunction(10);
754
755         if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
756                 goto tx_error_icmp;
757
758         /* MTU checking */
759         mtu = dst_mtu(&rt->u.dst);
760         if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
761                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
762                 ip_rt_put(rt);
763                 IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n");
764                 goto tx_error;
765         }
766
767         /*
768          * Call ip_send_check because we are not sure it is called
769          * after ip_defrag. Is copy-on-write needed?
770          */
771         if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
772                 ip_rt_put(rt);
773                 return NF_STOLEN;
774         }
775         ip_send_check(ip_hdr(skb));
776
777         /* drop old route */
778         skb_dst_drop(skb);
779         skb_dst_set(skb, &rt->u.dst);
780
781         /* Another hack: avoid icmp_send in ip_fragment */
782         skb->local_df = 1;
783
784         IP_VS_XMIT(PF_INET, skb, rt);
785
786         LeaveFunction(10);
787         return NF_STOLEN;
788
789   tx_error_icmp:
790         dst_link_failure(skb);
791   tx_error:
792         kfree_skb(skb);
793         LeaveFunction(10);
794         return NF_STOLEN;
795 }
796
797 #ifdef CONFIG_IP_VS_IPV6
798 int
799 ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
800                  struct ip_vs_protocol *pp)
801 {
802         struct rt6_info *rt;                    /* Route to the other host */
803         int    mtu;
804
805         EnterFunction(10);
806
807         rt = __ip_vs_get_out_rt_v6(cp);
808         if (!rt)
809                 goto tx_error_icmp;
810
811         /* MTU checking */
812         mtu = dst_mtu(&rt->u.dst);
813         if (skb->len > mtu) {
814                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
815                 dst_release(&rt->u.dst);
816                 IP_VS_DBG_RL("ip_vs_dr_xmit_v6(): frag needed\n");
817                 goto tx_error;
818         }
819
820         /*
821          * Call ip_send_check because we are not sure it is called
822          * after ip_defrag. Is copy-on-write needed?
823          */
824         skb = skb_share_check(skb, GFP_ATOMIC);
825         if (unlikely(skb == NULL)) {
826                 dst_release(&rt->u.dst);
827                 return NF_STOLEN;
828         }
829
830         /* drop old route */
831         skb_dst_drop(skb);
832         skb_dst_set(skb, &rt->u.dst);
833
834         /* Another hack: avoid icmp_send in ip_fragment */
835         skb->local_df = 1;
836
837         IP_VS_XMIT(PF_INET6, skb, rt);
838
839         LeaveFunction(10);
840         return NF_STOLEN;
841
842 tx_error_icmp:
843         dst_link_failure(skb);
844 tx_error:
845         kfree_skb(skb);
846         LeaveFunction(10);
847         return NF_STOLEN;
848 }
849 #endif
850
851
852 /*
853  *      ICMP packet transmitter
854  *      called by the ip_vs_in_icmp
855  */
856 int
857 ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
858                 struct ip_vs_protocol *pp, int offset)
859 {
860         struct rtable   *rt;    /* Route to the other host */
861         int mtu;
862         int rc;
863
864         EnterFunction(10);
865
866         /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
867            forwarded directly here, because there is no need to
868            translate address/port back */
869         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
870                 if (cp->packet_xmit)
871                         rc = cp->packet_xmit(skb, cp, pp);
872                 else
873                         rc = NF_ACCEPT;
874                 /* do not touch skb anymore */
875                 atomic_inc(&cp->in_pkts);
876                 goto out;
877         }
878
879         /*
880          * mangle and send the packet here (only for VS/NAT)
881          */
882
883         if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos))))
884                 goto tx_error_icmp;
885
886         /* MTU checking */
887         mtu = dst_mtu(&rt->u.dst);
888         if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
889                 ip_rt_put(rt);
890                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
891                 IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
892                 goto tx_error;
893         }
894
895         /* copy-on-write the packet before mangling it */
896         if (!skb_make_writable(skb, offset))
897                 goto tx_error_put;
898
899         if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
900                 goto tx_error_put;
901
902         /* drop the old route when skb is not shared */
903         skb_dst_drop(skb);
904         skb_dst_set(skb, &rt->u.dst);
905
906         ip_vs_nat_icmp(skb, pp, cp, 0);
907
908         /* Another hack: avoid icmp_send in ip_fragment */
909         skb->local_df = 1;
910
911         IP_VS_XMIT(PF_INET, skb, rt);
912
913         rc = NF_STOLEN;
914         goto out;
915
916   tx_error_icmp:
917         dst_link_failure(skb);
918   tx_error:
919         dev_kfree_skb(skb);
920         rc = NF_STOLEN;
921   out:
922         LeaveFunction(10);
923         return rc;
924   tx_error_put:
925         ip_rt_put(rt);
926         goto tx_error;
927 }
928
929 #ifdef CONFIG_IP_VS_IPV6
930 int
931 ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
932                 struct ip_vs_protocol *pp, int offset)
933 {
934         struct rt6_info *rt;    /* Route to the other host */
935         int mtu;
936         int rc;
937
938         EnterFunction(10);
939
940         /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
941            forwarded directly here, because there is no need to
942            translate address/port back */
943         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
944                 if (cp->packet_xmit)
945                         rc = cp->packet_xmit(skb, cp, pp);
946                 else
947                         rc = NF_ACCEPT;
948                 /* do not touch skb anymore */
949                 atomic_inc(&cp->in_pkts);
950                 goto out;
951         }
952
953         /*
954          * mangle and send the packet here (only for VS/NAT)
955          */
956
957         rt = __ip_vs_get_out_rt_v6(cp);
958         if (!rt)
959                 goto tx_error_icmp;
960
961         /* MTU checking */
962         mtu = dst_mtu(&rt->u.dst);
963         if (skb->len > mtu) {
964                 dst_release(&rt->u.dst);
965                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
966                 IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
967                 goto tx_error;
968         }
969
970         /* copy-on-write the packet before mangling it */
971         if (!skb_make_writable(skb, offset))
972                 goto tx_error_put;
973
974         if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
975                 goto tx_error_put;
976
977         /* drop the old route when skb is not shared */
978         skb_dst_drop(skb);
979         skb_dst_set(skb, &rt->u.dst);
980
981         ip_vs_nat_icmp_v6(skb, pp, cp, 0);
982
983         /* Another hack: avoid icmp_send in ip_fragment */
984         skb->local_df = 1;
985
986         IP_VS_XMIT(PF_INET6, skb, rt);
987
988         rc = NF_STOLEN;
989         goto out;
990
991 tx_error_icmp:
992         dst_link_failure(skb);
993 tx_error:
994         dev_kfree_skb(skb);
995         rc = NF_STOLEN;
996 out:
997         LeaveFunction(10);
998         return rc;
999 tx_error_put:
1000         dst_release(&rt->u.dst);
1001         goto tx_error;
1002 }
1003 #endif