udpv6: Fix gso_size setting in ip6_ufo_append_data
[safe/jmp/linux-2.6] / net / ipv6 / ip6_output.c
1 /*
2  *      IPv6 output functions
3  *      Linux INET6 implementation
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      Based on linux/net/ipv4/ip_output.c
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  *
15  *      Changes:
16  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
17  *                              extension headers are implemented.
18  *                              route changes now work.
19  *                              ip6_forward does not confuse sniffers.
20  *                              etc.
21  *
22  *      H. von Brand    :       Added missing #include <linux/string.h>
23  *      Imran Patel     :       frag id should be in NBO
24  *      Kazunori MIYAZAWA @USAGI
25  *                      :       add ip6_append_data and related functions
26  *                              for datagram xmit
27  */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40
41 #include <linux/netfilter.h>
42 #include <linux/netfilter_ipv6.h>
43
44 #include <net/sock.h>
45 #include <net/snmp.h>
46
47 #include <net/ipv6.h>
48 #include <net/ndisc.h>
49 #include <net/protocol.h>
50 #include <net/ip6_route.h>
51 #include <net/addrconf.h>
52 #include <net/rawv6.h>
53 #include <net/icmp.h>
54 #include <net/xfrm.h>
55 #include <net/checksum.h>
56 #include <linux/mroute6.h>
57
58 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
59
60 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
61 {
62         static u32 ipv6_fragmentation_id = 1;
63         static DEFINE_SPINLOCK(ip6_id_lock);
64
65         spin_lock_bh(&ip6_id_lock);
66         fhdr->identification = htonl(ipv6_fragmentation_id);
67         if (++ipv6_fragmentation_id == 0)
68                 ipv6_fragmentation_id = 1;
69         spin_unlock_bh(&ip6_id_lock);
70 }
71
72 int __ip6_local_out(struct sk_buff *skb)
73 {
74         int len;
75
76         len = skb->len - sizeof(struct ipv6hdr);
77         if (len > IPV6_MAXPLEN)
78                 len = 0;
79         ipv6_hdr(skb)->payload_len = htons(len);
80
81         return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
82                        dst_output);
83 }
84
85 int ip6_local_out(struct sk_buff *skb)
86 {
87         int err;
88
89         err = __ip6_local_out(skb);
90         if (likely(err == 1))
91                 err = dst_output(skb);
92
93         return err;
94 }
95 EXPORT_SYMBOL_GPL(ip6_local_out);
96
97 static int ip6_output_finish(struct sk_buff *skb)
98 {
99         struct dst_entry *dst = skb_dst(skb);
100
101         if (dst->hh)
102                 return neigh_hh_output(dst->hh, skb);
103         else if (dst->neighbour)
104                 return dst->neighbour->output(skb);
105
106         IP6_INC_STATS_BH(dev_net(dst->dev),
107                          ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
108         kfree_skb(skb);
109         return -EINVAL;
110
111 }
112
113 /* dev_loopback_xmit for use with netfilter. */
114 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
115 {
116         skb_reset_mac_header(newskb);
117         __skb_pull(newskb, skb_network_offset(newskb));
118         newskb->pkt_type = PACKET_LOOPBACK;
119         newskb->ip_summed = CHECKSUM_UNNECESSARY;
120         WARN_ON(!skb_dst(newskb));
121
122         netif_rx(newskb);
123         return 0;
124 }
125
126
127 static int ip6_output2(struct sk_buff *skb)
128 {
129         struct dst_entry *dst = skb_dst(skb);
130         struct net_device *dev = dst->dev;
131
132         skb->protocol = htons(ETH_P_IPV6);
133         skb->dev = dev;
134
135         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
136                 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
137                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
138
139                 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
140                     ((mroute6_socket(dev_net(dev)) &&
141                      !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
142                      ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
143                                          &ipv6_hdr(skb)->saddr))) {
144                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
145
146                         /* Do not check for IFF_ALLMULTI; multicast routing
147                            is not supported in any case.
148                          */
149                         if (newskb)
150                                 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
151                                         NULL, newskb->dev,
152                                         ip6_dev_loopback_xmit);
153
154                         if (ipv6_hdr(skb)->hop_limit == 0) {
155                                 IP6_INC_STATS(dev_net(dev), idev,
156                                               IPSTATS_MIB_OUTDISCARDS);
157                                 kfree_skb(skb);
158                                 return 0;
159                         }
160                 }
161
162                 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
163                                 skb->len);
164         }
165
166         return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
167                        ip6_output_finish);
168 }
169
170 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
171 {
172         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
173
174         return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
175                skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
176 }
177
178 int ip6_output(struct sk_buff *skb)
179 {
180         struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
181         if (unlikely(idev->cnf.disable_ipv6)) {
182                 IP6_INC_STATS(dev_net(skb_dst(skb)->dev), idev,
183                               IPSTATS_MIB_OUTDISCARDS);
184                 kfree_skb(skb);
185                 return 0;
186         }
187
188         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
189                                 dst_allfrag(skb_dst(skb)))
190                 return ip6_fragment(skb, ip6_output2);
191         else
192                 return ip6_output2(skb);
193 }
194
195 /*
196  *      xmit an sk_buff (used by TCP)
197  */
198
199 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
200              struct ipv6_txoptions *opt, int ipfragok)
201 {
202         struct net *net = sock_net(sk);
203         struct ipv6_pinfo *np = inet6_sk(sk);
204         struct in6_addr *first_hop = &fl->fl6_dst;
205         struct dst_entry *dst = skb_dst(skb);
206         struct ipv6hdr *hdr;
207         u8  proto = fl->proto;
208         int seg_len = skb->len;
209         int hlimit, tclass;
210         u32 mtu;
211
212         if (opt) {
213                 unsigned int head_room;
214
215                 /* First: exthdrs may take lots of space (~8K for now)
216                    MAX_HEADER is not enough.
217                  */
218                 head_room = opt->opt_nflen + opt->opt_flen;
219                 seg_len += head_room;
220                 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
221
222                 if (skb_headroom(skb) < head_room) {
223                         struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
224                         if (skb2 == NULL) {
225                                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
226                                               IPSTATS_MIB_OUTDISCARDS);
227                                 kfree_skb(skb);
228                                 return -ENOBUFS;
229                         }
230                         kfree_skb(skb);
231                         skb = skb2;
232                         if (sk)
233                                 skb_set_owner_w(skb, sk);
234                 }
235                 if (opt->opt_flen)
236                         ipv6_push_frag_opts(skb, opt, &proto);
237                 if (opt->opt_nflen)
238                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
239         }
240
241         skb_push(skb, sizeof(struct ipv6hdr));
242         skb_reset_network_header(skb);
243         hdr = ipv6_hdr(skb);
244
245         /* Allow local fragmentation. */
246         if (ipfragok)
247                 skb->local_df = 1;
248
249         /*
250          *      Fill in the IPv6 header
251          */
252
253         hlimit = -1;
254         if (np)
255                 hlimit = np->hop_limit;
256         if (hlimit < 0)
257                 hlimit = ip6_dst_hoplimit(dst);
258
259         tclass = -1;
260         if (np)
261                 tclass = np->tclass;
262         if (tclass < 0)
263                 tclass = 0;
264
265         *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
266
267         hdr->payload_len = htons(seg_len);
268         hdr->nexthdr = proto;
269         hdr->hop_limit = hlimit;
270
271         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
272         ipv6_addr_copy(&hdr->daddr, first_hop);
273
274         skb->priority = sk->sk_priority;
275         skb->mark = sk->sk_mark;
276
277         mtu = dst_mtu(dst);
278         if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
279                 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
280                               IPSTATS_MIB_OUT, skb->len);
281                 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
282                                 dst_output);
283         }
284
285         if (net_ratelimit())
286                 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
287         skb->dev = dst->dev;
288         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
289         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
290         kfree_skb(skb);
291         return -EMSGSIZE;
292 }
293
294 EXPORT_SYMBOL(ip6_xmit);
295
296 /*
297  *      To avoid extra problems ND packets are send through this
298  *      routine. It's code duplication but I really want to avoid
299  *      extra checks since ipv6_build_header is used by TCP (which
300  *      is for us performance critical)
301  */
302
303 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
304                const struct in6_addr *saddr, const struct in6_addr *daddr,
305                int proto, int len)
306 {
307         struct ipv6_pinfo *np = inet6_sk(sk);
308         struct ipv6hdr *hdr;
309         int totlen;
310
311         skb->protocol = htons(ETH_P_IPV6);
312         skb->dev = dev;
313
314         totlen = len + sizeof(struct ipv6hdr);
315
316         skb_reset_network_header(skb);
317         skb_put(skb, sizeof(struct ipv6hdr));
318         hdr = ipv6_hdr(skb);
319
320         *(__be32*)hdr = htonl(0x60000000);
321
322         hdr->payload_len = htons(len);
323         hdr->nexthdr = proto;
324         hdr->hop_limit = np->hop_limit;
325
326         ipv6_addr_copy(&hdr->saddr, saddr);
327         ipv6_addr_copy(&hdr->daddr, daddr);
328
329         return 0;
330 }
331
332 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
333 {
334         struct ip6_ra_chain *ra;
335         struct sock *last = NULL;
336
337         read_lock(&ip6_ra_lock);
338         for (ra = ip6_ra_chain; ra; ra = ra->next) {
339                 struct sock *sk = ra->sk;
340                 if (sk && ra->sel == sel &&
341                     (!sk->sk_bound_dev_if ||
342                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
343                         if (last) {
344                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
345                                 if (skb2)
346                                         rawv6_rcv(last, skb2);
347                         }
348                         last = sk;
349                 }
350         }
351
352         if (last) {
353                 rawv6_rcv(last, skb);
354                 read_unlock(&ip6_ra_lock);
355                 return 1;
356         }
357         read_unlock(&ip6_ra_lock);
358         return 0;
359 }
360
361 static int ip6_forward_proxy_check(struct sk_buff *skb)
362 {
363         struct ipv6hdr *hdr = ipv6_hdr(skb);
364         u8 nexthdr = hdr->nexthdr;
365         int offset;
366
367         if (ipv6_ext_hdr(nexthdr)) {
368                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
369                 if (offset < 0)
370                         return 0;
371         } else
372                 offset = sizeof(struct ipv6hdr);
373
374         if (nexthdr == IPPROTO_ICMPV6) {
375                 struct icmp6hdr *icmp6;
376
377                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
378                                          offset + 1 - skb->data)))
379                         return 0;
380
381                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
382
383                 switch (icmp6->icmp6_type) {
384                 case NDISC_ROUTER_SOLICITATION:
385                 case NDISC_ROUTER_ADVERTISEMENT:
386                 case NDISC_NEIGHBOUR_SOLICITATION:
387                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
388                 case NDISC_REDIRECT:
389                         /* For reaction involving unicast neighbor discovery
390                          * message destined to the proxied address, pass it to
391                          * input function.
392                          */
393                         return 1;
394                 default:
395                         break;
396                 }
397         }
398
399         /*
400          * The proxying router can't forward traffic sent to a link-local
401          * address, so signal the sender and discard the packet. This
402          * behavior is clarified by the MIPv6 specification.
403          */
404         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
405                 dst_link_failure(skb);
406                 return -1;
407         }
408
409         return 0;
410 }
411
412 static inline int ip6_forward_finish(struct sk_buff *skb)
413 {
414         return dst_output(skb);
415 }
416
417 int ip6_forward(struct sk_buff *skb)
418 {
419         struct dst_entry *dst = skb_dst(skb);
420         struct ipv6hdr *hdr = ipv6_hdr(skb);
421         struct inet6_skb_parm *opt = IP6CB(skb);
422         struct net *net = dev_net(dst->dev);
423
424         if (net->ipv6.devconf_all->forwarding == 0)
425                 goto error;
426
427         if (skb_warn_if_lro(skb))
428                 goto drop;
429
430         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
431                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
432                 goto drop;
433         }
434
435         skb_forward_csum(skb);
436
437         /*
438          *      We DO NOT make any processing on
439          *      RA packets, pushing them to user level AS IS
440          *      without ane WARRANTY that application will be able
441          *      to interpret them. The reason is that we
442          *      cannot make anything clever here.
443          *
444          *      We are not end-node, so that if packet contains
445          *      AH/ESP, we cannot make anything.
446          *      Defragmentation also would be mistake, RA packets
447          *      cannot be fragmented, because there is no warranty
448          *      that different fragments will go along one path. --ANK
449          */
450         if (opt->ra) {
451                 u8 *ptr = skb_network_header(skb) + opt->ra;
452                 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
453                         return 0;
454         }
455
456         /*
457          *      check and decrement ttl
458          */
459         if (hdr->hop_limit <= 1) {
460                 /* Force OUTPUT device used as source address */
461                 skb->dev = dst->dev;
462                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
463                             0, skb->dev);
464                 IP6_INC_STATS_BH(net,
465                                  ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
466
467                 kfree_skb(skb);
468                 return -ETIMEDOUT;
469         }
470
471         /* XXX: idev->cnf.proxy_ndp? */
472         if (net->ipv6.devconf_all->proxy_ndp &&
473             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
474                 int proxied = ip6_forward_proxy_check(skb);
475                 if (proxied > 0)
476                         return ip6_input(skb);
477                 else if (proxied < 0) {
478                         IP6_INC_STATS(net, ip6_dst_idev(dst),
479                                       IPSTATS_MIB_INDISCARDS);
480                         goto drop;
481                 }
482         }
483
484         if (!xfrm6_route_forward(skb)) {
485                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
486                 goto drop;
487         }
488         dst = skb_dst(skb);
489
490         /* IPv6 specs say nothing about it, but it is clear that we cannot
491            send redirects to source routed frames.
492            We don't send redirects to frames decapsulated from IPsec.
493          */
494         if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
495             !skb_sec_path(skb)) {
496                 struct in6_addr *target = NULL;
497                 struct rt6_info *rt;
498                 struct neighbour *n = dst->neighbour;
499
500                 /*
501                  *      incoming and outgoing devices are the same
502                  *      send a redirect.
503                  */
504
505                 rt = (struct rt6_info *) dst;
506                 if ((rt->rt6i_flags & RTF_GATEWAY))
507                         target = (struct in6_addr*)&n->primary_key;
508                 else
509                         target = &hdr->daddr;
510
511                 /* Limit redirects both by destination (here)
512                    and by source (inside ndisc_send_redirect)
513                  */
514                 if (xrlim_allow(dst, 1*HZ))
515                         ndisc_send_redirect(skb, n, target);
516         } else {
517                 int addrtype = ipv6_addr_type(&hdr->saddr);
518
519                 /* This check is security critical. */
520                 if (addrtype == IPV6_ADDR_ANY ||
521                     addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
522                         goto error;
523                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
524                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
525                                 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
526                         goto error;
527                 }
528         }
529
530         if (skb->len > dst_mtu(dst)) {
531                 /* Again, force OUTPUT device used as source address */
532                 skb->dev = dst->dev;
533                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
534                 IP6_INC_STATS_BH(net,
535                                  ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
536                 IP6_INC_STATS_BH(net,
537                                  ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
538                 kfree_skb(skb);
539                 return -EMSGSIZE;
540         }
541
542         if (skb_cow(skb, dst->dev->hard_header_len)) {
543                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
544                 goto drop;
545         }
546
547         hdr = ipv6_hdr(skb);
548
549         /* Mangling hops number delayed to point after skb COW */
550
551         hdr->hop_limit--;
552
553         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
554         return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
555                        ip6_forward_finish);
556
557 error:
558         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
559 drop:
560         kfree_skb(skb);
561         return -EINVAL;
562 }
563
564 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
565 {
566         to->pkt_type = from->pkt_type;
567         to->priority = from->priority;
568         to->protocol = from->protocol;
569         skb_dst_drop(to);
570         skb_dst_set(to, dst_clone(skb_dst(from)));
571         to->dev = from->dev;
572         to->mark = from->mark;
573
574 #ifdef CONFIG_NET_SCHED
575         to->tc_index = from->tc_index;
576 #endif
577         nf_copy(to, from);
578 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
579     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
580         to->nf_trace = from->nf_trace;
581 #endif
582         skb_copy_secmark(to, from);
583 }
584
585 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
586 {
587         u16 offset = sizeof(struct ipv6hdr);
588         struct ipv6_opt_hdr *exthdr =
589                                 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
590         unsigned int packet_len = skb->tail - skb->network_header;
591         int found_rhdr = 0;
592         *nexthdr = &ipv6_hdr(skb)->nexthdr;
593
594         while (offset + 1 <= packet_len) {
595
596                 switch (**nexthdr) {
597
598                 case NEXTHDR_HOP:
599                         break;
600                 case NEXTHDR_ROUTING:
601                         found_rhdr = 1;
602                         break;
603                 case NEXTHDR_DEST:
604 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
605                         if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
606                                 break;
607 #endif
608                         if (found_rhdr)
609                                 return offset;
610                         break;
611                 default :
612                         return offset;
613                 }
614
615                 offset += ipv6_optlen(exthdr);
616                 *nexthdr = &exthdr->nexthdr;
617                 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
618                                                  offset);
619         }
620
621         return offset;
622 }
623
624 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
625 {
626         struct sk_buff *frag;
627         struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
628         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
629         struct ipv6hdr *tmp_hdr;
630         struct frag_hdr *fh;
631         unsigned int mtu, hlen, left, len;
632         __be32 frag_id = 0;
633         int ptr, offset = 0, err=0;
634         u8 *prevhdr, nexthdr = 0;
635         struct net *net = dev_net(skb_dst(skb)->dev);
636
637         hlen = ip6_find_1stfragopt(skb, &prevhdr);
638         nexthdr = *prevhdr;
639
640         mtu = ip6_skb_dst_mtu(skb);
641
642         /* We must not fragment if the socket is set to force MTU discovery
643          * or if the skb it not generated by a local socket.  (This last
644          * check should be redundant, but it's free.)
645          */
646         if (!skb->local_df) {
647                 skb->dev = skb_dst(skb)->dev;
648                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
649                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
650                               IPSTATS_MIB_FRAGFAILS);
651                 kfree_skb(skb);
652                 return -EMSGSIZE;
653         }
654
655         if (np && np->frag_size < mtu) {
656                 if (np->frag_size)
657                         mtu = np->frag_size;
658         }
659         mtu -= hlen + sizeof(struct frag_hdr);
660
661         if (skb_has_frags(skb)) {
662                 int first_len = skb_pagelen(skb);
663                 int truesizes = 0;
664
665                 if (first_len - hlen > mtu ||
666                     ((first_len - hlen) & 7) ||
667                     skb_cloned(skb))
668                         goto slow_path;
669
670                 skb_walk_frags(skb, frag) {
671                         /* Correct geometry. */
672                         if (frag->len > mtu ||
673                             ((frag->len & 7) && frag->next) ||
674                             skb_headroom(frag) < hlen)
675                             goto slow_path;
676
677                         /* Partially cloned skb? */
678                         if (skb_shared(frag))
679                                 goto slow_path;
680
681                         BUG_ON(frag->sk);
682                         if (skb->sk) {
683                                 frag->sk = skb->sk;
684                                 frag->destructor = sock_wfree;
685                                 truesizes += frag->truesize;
686                         }
687                 }
688
689                 err = 0;
690                 offset = 0;
691                 frag = skb_shinfo(skb)->frag_list;
692                 skb_frag_list_init(skb);
693                 /* BUILD HEADER */
694
695                 *prevhdr = NEXTHDR_FRAGMENT;
696                 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
697                 if (!tmp_hdr) {
698                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
699                                       IPSTATS_MIB_FRAGFAILS);
700                         return -ENOMEM;
701                 }
702
703                 __skb_pull(skb, hlen);
704                 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
705                 __skb_push(skb, hlen);
706                 skb_reset_network_header(skb);
707                 memcpy(skb_network_header(skb), tmp_hdr, hlen);
708
709                 ipv6_select_ident(skb, fh);
710                 fh->nexthdr = nexthdr;
711                 fh->reserved = 0;
712                 fh->frag_off = htons(IP6_MF);
713                 frag_id = fh->identification;
714
715                 first_len = skb_pagelen(skb);
716                 skb->data_len = first_len - skb_headlen(skb);
717                 skb->truesize -= truesizes;
718                 skb->len = first_len;
719                 ipv6_hdr(skb)->payload_len = htons(first_len -
720                                                    sizeof(struct ipv6hdr));
721
722                 dst_hold(&rt->u.dst);
723
724                 for (;;) {
725                         /* Prepare header of the next frame,
726                          * before previous one went down. */
727                         if (frag) {
728                                 frag->ip_summed = CHECKSUM_NONE;
729                                 skb_reset_transport_header(frag);
730                                 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
731                                 __skb_push(frag, hlen);
732                                 skb_reset_network_header(frag);
733                                 memcpy(skb_network_header(frag), tmp_hdr,
734                                        hlen);
735                                 offset += skb->len - hlen - sizeof(struct frag_hdr);
736                                 fh->nexthdr = nexthdr;
737                                 fh->reserved = 0;
738                                 fh->frag_off = htons(offset);
739                                 if (frag->next != NULL)
740                                         fh->frag_off |= htons(IP6_MF);
741                                 fh->identification = frag_id;
742                                 ipv6_hdr(frag)->payload_len =
743                                                 htons(frag->len -
744                                                       sizeof(struct ipv6hdr));
745                                 ip6_copy_metadata(frag, skb);
746                         }
747
748                         err = output(skb);
749                         if(!err)
750                                 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
751                                               IPSTATS_MIB_FRAGCREATES);
752
753                         if (err || !frag)
754                                 break;
755
756                         skb = frag;
757                         frag = skb->next;
758                         skb->next = NULL;
759                 }
760
761                 kfree(tmp_hdr);
762
763                 if (err == 0) {
764                         IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
765                                       IPSTATS_MIB_FRAGOKS);
766                         dst_release(&rt->u.dst);
767                         return 0;
768                 }
769
770                 while (frag) {
771                         skb = frag->next;
772                         kfree_skb(frag);
773                         frag = skb;
774                 }
775
776                 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
777                               IPSTATS_MIB_FRAGFAILS);
778                 dst_release(&rt->u.dst);
779                 return err;
780         }
781
782 slow_path:
783         left = skb->len - hlen;         /* Space per frame */
784         ptr = hlen;                     /* Where to start from */
785
786         /*
787          *      Fragment the datagram.
788          */
789
790         *prevhdr = NEXTHDR_FRAGMENT;
791
792         /*
793          *      Keep copying data until we run out.
794          */
795         while(left > 0) {
796                 len = left;
797                 /* IF: it doesn't fit, use 'mtu' - the data space left */
798                 if (len > mtu)
799                         len = mtu;
800                 /* IF: we are not sending upto and including the packet end
801                    then align the next start on an eight byte boundary */
802                 if (len < left) {
803                         len &= ~7;
804                 }
805                 /*
806                  *      Allocate buffer.
807                  */
808
809                 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
810                         NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
811                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
812                                       IPSTATS_MIB_FRAGFAILS);
813                         err = -ENOMEM;
814                         goto fail;
815                 }
816
817                 /*
818                  *      Set up data on packet
819                  */
820
821                 ip6_copy_metadata(frag, skb);
822                 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
823                 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
824                 skb_reset_network_header(frag);
825                 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
826                 frag->transport_header = (frag->network_header + hlen +
827                                           sizeof(struct frag_hdr));
828
829                 /*
830                  *      Charge the memory for the fragment to any owner
831                  *      it might possess
832                  */
833                 if (skb->sk)
834                         skb_set_owner_w(frag, skb->sk);
835
836                 /*
837                  *      Copy the packet header into the new buffer.
838                  */
839                 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
840
841                 /*
842                  *      Build fragment header.
843                  */
844                 fh->nexthdr = nexthdr;
845                 fh->reserved = 0;
846                 if (!frag_id) {
847                         ipv6_select_ident(skb, fh);
848                         frag_id = fh->identification;
849                 } else
850                         fh->identification = frag_id;
851
852                 /*
853                  *      Copy a block of the IP datagram.
854                  */
855                 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
856                         BUG();
857                 left -= len;
858
859                 fh->frag_off = htons(offset);
860                 if (left > 0)
861                         fh->frag_off |= htons(IP6_MF);
862                 ipv6_hdr(frag)->payload_len = htons(frag->len -
863                                                     sizeof(struct ipv6hdr));
864
865                 ptr += len;
866                 offset += len;
867
868                 /*
869                  *      Put this fragment into the sending queue.
870                  */
871                 err = output(frag);
872                 if (err)
873                         goto fail;
874
875                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
876                               IPSTATS_MIB_FRAGCREATES);
877         }
878         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
879                       IPSTATS_MIB_FRAGOKS);
880         kfree_skb(skb);
881         return err;
882
883 fail:
884         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
885                       IPSTATS_MIB_FRAGFAILS);
886         kfree_skb(skb);
887         return err;
888 }
889
890 static inline int ip6_rt_check(struct rt6key *rt_key,
891                                struct in6_addr *fl_addr,
892                                struct in6_addr *addr_cache)
893 {
894         return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
895                 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
896 }
897
898 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
899                                           struct dst_entry *dst,
900                                           struct flowi *fl)
901 {
902         struct ipv6_pinfo *np = inet6_sk(sk);
903         struct rt6_info *rt = (struct rt6_info *)dst;
904
905         if (!dst)
906                 goto out;
907
908         /* Yes, checking route validity in not connected
909          * case is not very simple. Take into account,
910          * that we do not support routing by source, TOS,
911          * and MSG_DONTROUTE            --ANK (980726)
912          *
913          * 1. ip6_rt_check(): If route was host route,
914          *    check that cached destination is current.
915          *    If it is network route, we still may
916          *    check its validity using saved pointer
917          *    to the last used address: daddr_cache.
918          *    We do not want to save whole address now,
919          *    (because main consumer of this service
920          *    is tcp, which has not this problem),
921          *    so that the last trick works only on connected
922          *    sockets.
923          * 2. oif also should be the same.
924          */
925         if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
926 #ifdef CONFIG_IPV6_SUBTREES
927             ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
928 #endif
929             (fl->oif && fl->oif != dst->dev->ifindex)) {
930                 dst_release(dst);
931                 dst = NULL;
932         }
933
934 out:
935         return dst;
936 }
937
938 static int ip6_dst_lookup_tail(struct sock *sk,
939                                struct dst_entry **dst, struct flowi *fl)
940 {
941         int err;
942         struct net *net = sock_net(sk);
943
944         if (*dst == NULL)
945                 *dst = ip6_route_output(net, sk, fl);
946
947         if ((err = (*dst)->error))
948                 goto out_err_release;
949
950         if (ipv6_addr_any(&fl->fl6_src)) {
951                 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
952                                          &fl->fl6_dst,
953                                          sk ? inet6_sk(sk)->srcprefs : 0,
954                                          &fl->fl6_src);
955                 if (err)
956                         goto out_err_release;
957         }
958
959 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
960         /*
961          * Here if the dst entry we've looked up
962          * has a neighbour entry that is in the INCOMPLETE
963          * state and the src address from the flow is
964          * marked as OPTIMISTIC, we release the found
965          * dst entry and replace it instead with the
966          * dst entry of the nexthop router
967          */
968         if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
969                 struct inet6_ifaddr *ifp;
970                 struct flowi fl_gw;
971                 int redirect;
972
973                 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
974                                       (*dst)->dev, 1);
975
976                 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
977                 if (ifp)
978                         in6_ifa_put(ifp);
979
980                 if (redirect) {
981                         /*
982                          * We need to get the dst entry for the
983                          * default router instead
984                          */
985                         dst_release(*dst);
986                         memcpy(&fl_gw, fl, sizeof(struct flowi));
987                         memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
988                         *dst = ip6_route_output(net, sk, &fl_gw);
989                         if ((err = (*dst)->error))
990                                 goto out_err_release;
991                 }
992         }
993 #endif
994
995         return 0;
996
997 out_err_release:
998         if (err == -ENETUNREACH)
999                 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1000         dst_release(*dst);
1001         *dst = NULL;
1002         return err;
1003 }
1004
1005 /**
1006  *      ip6_dst_lookup - perform route lookup on flow
1007  *      @sk: socket which provides route info
1008  *      @dst: pointer to dst_entry * for result
1009  *      @fl: flow to lookup
1010  *
1011  *      This function performs a route lookup on the given flow.
1012  *
1013  *      It returns zero on success, or a standard errno code on error.
1014  */
1015 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1016 {
1017         *dst = NULL;
1018         return ip6_dst_lookup_tail(sk, dst, fl);
1019 }
1020 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1021
1022 /**
1023  *      ip6_sk_dst_lookup - perform socket cached route lookup on flow
1024  *      @sk: socket which provides the dst cache and route info
1025  *      @dst: pointer to dst_entry * for result
1026  *      @fl: flow to lookup
1027  *
1028  *      This function performs a route lookup on the given flow with the
1029  *      possibility of using the cached route in the socket if it is valid.
1030  *      It will take the socket dst lock when operating on the dst cache.
1031  *      As a result, this function can only be used in process context.
1032  *
1033  *      It returns zero on success, or a standard errno code on error.
1034  */
1035 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1036 {
1037         *dst = NULL;
1038         if (sk) {
1039                 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1040                 *dst = ip6_sk_dst_check(sk, *dst, fl);
1041         }
1042
1043         return ip6_dst_lookup_tail(sk, dst, fl);
1044 }
1045 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1046
1047 static inline int ip6_ufo_append_data(struct sock *sk,
1048                         int getfrag(void *from, char *to, int offset, int len,
1049                         int odd, struct sk_buff *skb),
1050                         void *from, int length, int hh_len, int fragheaderlen,
1051                         int transhdrlen, int mtu,unsigned int flags)
1052
1053 {
1054         struct sk_buff *skb;
1055         int err;
1056
1057         /* There is support for UDP large send offload by network
1058          * device, so create one single skb packet containing complete
1059          * udp datagram
1060          */
1061         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1062                 skb = sock_alloc_send_skb(sk,
1063                         hh_len + fragheaderlen + transhdrlen + 20,
1064                         (flags & MSG_DONTWAIT), &err);
1065                 if (skb == NULL)
1066                         return -ENOMEM;
1067
1068                 /* reserve space for Hardware header */
1069                 skb_reserve(skb, hh_len);
1070
1071                 /* create space for UDP/IP header */
1072                 skb_put(skb,fragheaderlen + transhdrlen);
1073
1074                 /* initialize network header pointer */
1075                 skb_reset_network_header(skb);
1076
1077                 /* initialize protocol header pointer */
1078                 skb->transport_header = skb->network_header + fragheaderlen;
1079
1080                 skb->ip_summed = CHECKSUM_PARTIAL;
1081                 skb->csum = 0;
1082                 sk->sk_sndmsg_off = 0;
1083         }
1084
1085         err = skb_append_datato_frags(sk,skb, getfrag, from,
1086                                       (length - transhdrlen));
1087         if (!err) {
1088                 struct frag_hdr fhdr;
1089
1090                 /* Specify the length of each IPv6 datagram fragment.
1091                  * It has to be a multiple of 8.
1092                  */
1093                 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1094                                              sizeof(struct frag_hdr)) & ~7;
1095                 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1096                 ipv6_select_ident(skb, &fhdr);
1097                 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1098                 __skb_queue_tail(&sk->sk_write_queue, skb);
1099
1100                 return 0;
1101         }
1102         /* There is not enough support do UPD LSO,
1103          * so follow normal path
1104          */
1105         kfree_skb(skb);
1106
1107         return err;
1108 }
1109
1110 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1111                                                gfp_t gfp)
1112 {
1113         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1114 }
1115
1116 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1117                                                 gfp_t gfp)
1118 {
1119         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1120 }
1121
1122 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1123         int offset, int len, int odd, struct sk_buff *skb),
1124         void *from, int length, int transhdrlen,
1125         int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1126         struct rt6_info *rt, unsigned int flags)
1127 {
1128         struct inet_sock *inet = inet_sk(sk);
1129         struct ipv6_pinfo *np = inet6_sk(sk);
1130         struct sk_buff *skb;
1131         unsigned int maxfraglen, fragheaderlen;
1132         int exthdrlen;
1133         int hh_len;
1134         int mtu;
1135         int copy;
1136         int err;
1137         int offset = 0;
1138         int csummode = CHECKSUM_NONE;
1139
1140         if (flags&MSG_PROBE)
1141                 return 0;
1142         if (skb_queue_empty(&sk->sk_write_queue)) {
1143                 /*
1144                  * setup for corking
1145                  */
1146                 if (opt) {
1147                         if (WARN_ON(np->cork.opt))
1148                                 return -EINVAL;
1149
1150                         np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1151                         if (unlikely(np->cork.opt == NULL))
1152                                 return -ENOBUFS;
1153
1154                         np->cork.opt->tot_len = opt->tot_len;
1155                         np->cork.opt->opt_flen = opt->opt_flen;
1156                         np->cork.opt->opt_nflen = opt->opt_nflen;
1157
1158                         np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1159                                                             sk->sk_allocation);
1160                         if (opt->dst0opt && !np->cork.opt->dst0opt)
1161                                 return -ENOBUFS;
1162
1163                         np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1164                                                             sk->sk_allocation);
1165                         if (opt->dst1opt && !np->cork.opt->dst1opt)
1166                                 return -ENOBUFS;
1167
1168                         np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1169                                                            sk->sk_allocation);
1170                         if (opt->hopopt && !np->cork.opt->hopopt)
1171                                 return -ENOBUFS;
1172
1173                         np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1174                                                             sk->sk_allocation);
1175                         if (opt->srcrt && !np->cork.opt->srcrt)
1176                                 return -ENOBUFS;
1177
1178                         /* need source address above miyazawa*/
1179                 }
1180                 dst_hold(&rt->u.dst);
1181                 inet->cork.dst = &rt->u.dst;
1182                 inet->cork.fl = *fl;
1183                 np->cork.hop_limit = hlimit;
1184                 np->cork.tclass = tclass;
1185                 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1186                       rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1187                 if (np->frag_size < mtu) {
1188                         if (np->frag_size)
1189                                 mtu = np->frag_size;
1190                 }
1191                 inet->cork.fragsize = mtu;
1192                 if (dst_allfrag(rt->u.dst.path))
1193                         inet->cork.flags |= IPCORK_ALLFRAG;
1194                 inet->cork.length = 0;
1195                 sk->sk_sndmsg_page = NULL;
1196                 sk->sk_sndmsg_off = 0;
1197                 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1198                             rt->rt6i_nfheader_len;
1199                 length += exthdrlen;
1200                 transhdrlen += exthdrlen;
1201         } else {
1202                 rt = (struct rt6_info *)inet->cork.dst;
1203                 fl = &inet->cork.fl;
1204                 opt = np->cork.opt;
1205                 transhdrlen = 0;
1206                 exthdrlen = 0;
1207                 mtu = inet->cork.fragsize;
1208         }
1209
1210         hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1211
1212         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1213                         (opt ? opt->opt_nflen : 0);
1214         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1215
1216         if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1217                 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1218                         ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1219                         return -EMSGSIZE;
1220                 }
1221         }
1222
1223         /*
1224          * Let's try using as much space as possible.
1225          * Use MTU if total length of the message fits into the MTU.
1226          * Otherwise, we need to reserve fragment header and
1227          * fragment alignment (= 8-15 octects, in total).
1228          *
1229          * Note that we may need to "move" the data from the tail of
1230          * of the buffer to the new fragment when we split
1231          * the message.
1232          *
1233          * FIXME: It may be fragmented into multiple chunks
1234          *        at once if non-fragmentable extension headers
1235          *        are too large.
1236          * --yoshfuji
1237          */
1238
1239         inet->cork.length += length;
1240         if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1241             (rt->u.dst.dev->features & NETIF_F_UFO)) {
1242
1243                 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1244                                           fragheaderlen, transhdrlen, mtu,
1245                                           flags);
1246                 if (err)
1247                         goto error;
1248                 return 0;
1249         }
1250
1251         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1252                 goto alloc_new_skb;
1253
1254         while (length > 0) {
1255                 /* Check if the remaining data fits into current packet. */
1256                 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1257                 if (copy < length)
1258                         copy = maxfraglen - skb->len;
1259
1260                 if (copy <= 0) {
1261                         char *data;
1262                         unsigned int datalen;
1263                         unsigned int fraglen;
1264                         unsigned int fraggap;
1265                         unsigned int alloclen;
1266                         struct sk_buff *skb_prev;
1267 alloc_new_skb:
1268                         skb_prev = skb;
1269
1270                         /* There's no room in the current skb */
1271                         if (skb_prev)
1272                                 fraggap = skb_prev->len - maxfraglen;
1273                         else
1274                                 fraggap = 0;
1275
1276                         /*
1277                          * If remaining data exceeds the mtu,
1278                          * we know we need more fragment(s).
1279                          */
1280                         datalen = length + fraggap;
1281                         if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1282                                 datalen = maxfraglen - fragheaderlen;
1283
1284                         fraglen = datalen + fragheaderlen;
1285                         if ((flags & MSG_MORE) &&
1286                             !(rt->u.dst.dev->features&NETIF_F_SG))
1287                                 alloclen = mtu;
1288                         else
1289                                 alloclen = datalen + fragheaderlen;
1290
1291                         /*
1292                          * The last fragment gets additional space at tail.
1293                          * Note: we overallocate on fragments with MSG_MODE
1294                          * because we have no idea if we're the last one.
1295                          */
1296                         if (datalen == length + fraggap)
1297                                 alloclen += rt->u.dst.trailer_len;
1298
1299                         /*
1300                          * We just reserve space for fragment header.
1301                          * Note: this may be overallocation if the message
1302                          * (without MSG_MORE) fits into the MTU.
1303                          */
1304                         alloclen += sizeof(struct frag_hdr);
1305
1306                         if (transhdrlen) {
1307                                 skb = sock_alloc_send_skb(sk,
1308                                                 alloclen + hh_len,
1309                                                 (flags & MSG_DONTWAIT), &err);
1310                         } else {
1311                                 skb = NULL;
1312                                 if (atomic_read(&sk->sk_wmem_alloc) <=
1313                                     2 * sk->sk_sndbuf)
1314                                         skb = sock_wmalloc(sk,
1315                                                            alloclen + hh_len, 1,
1316                                                            sk->sk_allocation);
1317                                 if (unlikely(skb == NULL))
1318                                         err = -ENOBUFS;
1319                         }
1320                         if (skb == NULL)
1321                                 goto error;
1322                         /*
1323                          *      Fill in the control structures
1324                          */
1325                         skb->ip_summed = csummode;
1326                         skb->csum = 0;
1327                         /* reserve for fragmentation */
1328                         skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1329
1330                         /*
1331                          *      Find where to start putting bytes
1332                          */
1333                         data = skb_put(skb, fraglen);
1334                         skb_set_network_header(skb, exthdrlen);
1335                         data += fragheaderlen;
1336                         skb->transport_header = (skb->network_header +
1337                                                  fragheaderlen);
1338                         if (fraggap) {
1339                                 skb->csum = skb_copy_and_csum_bits(
1340                                         skb_prev, maxfraglen,
1341                                         data + transhdrlen, fraggap, 0);
1342                                 skb_prev->csum = csum_sub(skb_prev->csum,
1343                                                           skb->csum);
1344                                 data += fraggap;
1345                                 pskb_trim_unique(skb_prev, maxfraglen);
1346                         }
1347                         copy = datalen - transhdrlen - fraggap;
1348                         if (copy < 0) {
1349                                 err = -EINVAL;
1350                                 kfree_skb(skb);
1351                                 goto error;
1352                         } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1353                                 err = -EFAULT;
1354                                 kfree_skb(skb);
1355                                 goto error;
1356                         }
1357
1358                         offset += copy;
1359                         length -= datalen - fraggap;
1360                         transhdrlen = 0;
1361                         exthdrlen = 0;
1362                         csummode = CHECKSUM_NONE;
1363
1364                         /*
1365                          * Put the packet on the pending queue
1366                          */
1367                         __skb_queue_tail(&sk->sk_write_queue, skb);
1368                         continue;
1369                 }
1370
1371                 if (copy > length)
1372                         copy = length;
1373
1374                 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1375                         unsigned int off;
1376
1377                         off = skb->len;
1378                         if (getfrag(from, skb_put(skb, copy),
1379                                                 offset, copy, off, skb) < 0) {
1380                                 __skb_trim(skb, off);
1381                                 err = -EFAULT;
1382                                 goto error;
1383                         }
1384                 } else {
1385                         int i = skb_shinfo(skb)->nr_frags;
1386                         skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1387                         struct page *page = sk->sk_sndmsg_page;
1388                         int off = sk->sk_sndmsg_off;
1389                         unsigned int left;
1390
1391                         if (page && (left = PAGE_SIZE - off) > 0) {
1392                                 if (copy >= left)
1393                                         copy = left;
1394                                 if (page != frag->page) {
1395                                         if (i == MAX_SKB_FRAGS) {
1396                                                 err = -EMSGSIZE;
1397                                                 goto error;
1398                                         }
1399                                         get_page(page);
1400                                         skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1401                                         frag = &skb_shinfo(skb)->frags[i];
1402                                 }
1403                         } else if(i < MAX_SKB_FRAGS) {
1404                                 if (copy > PAGE_SIZE)
1405                                         copy = PAGE_SIZE;
1406                                 page = alloc_pages(sk->sk_allocation, 0);
1407                                 if (page == NULL) {
1408                                         err = -ENOMEM;
1409                                         goto error;
1410                                 }
1411                                 sk->sk_sndmsg_page = page;
1412                                 sk->sk_sndmsg_off = 0;
1413
1414                                 skb_fill_page_desc(skb, i, page, 0, 0);
1415                                 frag = &skb_shinfo(skb)->frags[i];
1416                         } else {
1417                                 err = -EMSGSIZE;
1418                                 goto error;
1419                         }
1420                         if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1421                                 err = -EFAULT;
1422                                 goto error;
1423                         }
1424                         sk->sk_sndmsg_off += copy;
1425                         frag->size += copy;
1426                         skb->len += copy;
1427                         skb->data_len += copy;
1428                         skb->truesize += copy;
1429                         atomic_add(copy, &sk->sk_wmem_alloc);
1430                 }
1431                 offset += copy;
1432                 length -= copy;
1433         }
1434         return 0;
1435 error:
1436         inet->cork.length -= length;
1437         IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1438         return err;
1439 }
1440
1441 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1442 {
1443         if (np->cork.opt) {
1444                 kfree(np->cork.opt->dst0opt);
1445                 kfree(np->cork.opt->dst1opt);
1446                 kfree(np->cork.opt->hopopt);
1447                 kfree(np->cork.opt->srcrt);
1448                 kfree(np->cork.opt);
1449                 np->cork.opt = NULL;
1450         }
1451
1452         if (inet->cork.dst) {
1453                 dst_release(inet->cork.dst);
1454                 inet->cork.dst = NULL;
1455                 inet->cork.flags &= ~IPCORK_ALLFRAG;
1456         }
1457         memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1458 }
1459
1460 int ip6_push_pending_frames(struct sock *sk)
1461 {
1462         struct sk_buff *skb, *tmp_skb;
1463         struct sk_buff **tail_skb;
1464         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1465         struct inet_sock *inet = inet_sk(sk);
1466         struct ipv6_pinfo *np = inet6_sk(sk);
1467         struct net *net = sock_net(sk);
1468         struct ipv6hdr *hdr;
1469         struct ipv6_txoptions *opt = np->cork.opt;
1470         struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1471         struct flowi *fl = &inet->cork.fl;
1472         unsigned char proto = fl->proto;
1473         int err = 0;
1474
1475         if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1476                 goto out;
1477         tail_skb = &(skb_shinfo(skb)->frag_list);
1478
1479         /* move skb->data to ip header from ext header */
1480         if (skb->data < skb_network_header(skb))
1481                 __skb_pull(skb, skb_network_offset(skb));
1482         while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1483                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1484                 *tail_skb = tmp_skb;
1485                 tail_skb = &(tmp_skb->next);
1486                 skb->len += tmp_skb->len;
1487                 skb->data_len += tmp_skb->len;
1488                 skb->truesize += tmp_skb->truesize;
1489                 __sock_put(tmp_skb->sk);
1490                 tmp_skb->destructor = NULL;
1491                 tmp_skb->sk = NULL;
1492         }
1493
1494         /* Allow local fragmentation. */
1495         if (np->pmtudisc < IPV6_PMTUDISC_DO)
1496                 skb->local_df = 1;
1497
1498         ipv6_addr_copy(final_dst, &fl->fl6_dst);
1499         __skb_pull(skb, skb_network_header_len(skb));
1500         if (opt && opt->opt_flen)
1501                 ipv6_push_frag_opts(skb, opt, &proto);
1502         if (opt && opt->opt_nflen)
1503                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1504
1505         skb_push(skb, sizeof(struct ipv6hdr));
1506         skb_reset_network_header(skb);
1507         hdr = ipv6_hdr(skb);
1508
1509         *(__be32*)hdr = fl->fl6_flowlabel |
1510                      htonl(0x60000000 | ((int)np->cork.tclass << 20));
1511
1512         hdr->hop_limit = np->cork.hop_limit;
1513         hdr->nexthdr = proto;
1514         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1515         ipv6_addr_copy(&hdr->daddr, final_dst);
1516
1517         skb->priority = sk->sk_priority;
1518         skb->mark = sk->sk_mark;
1519
1520         skb_dst_set(skb, dst_clone(&rt->u.dst));
1521         IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1522         if (proto == IPPROTO_ICMPV6) {
1523                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1524
1525                 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1526                 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1527         }
1528
1529         err = ip6_local_out(skb);
1530         if (err) {
1531                 if (err > 0)
1532                         err = np->recverr ? net_xmit_errno(err) : 0;
1533                 if (err)
1534                         goto error;
1535         }
1536
1537 out:
1538         ip6_cork_release(inet, np);
1539         return err;
1540 error:
1541         goto out;
1542 }
1543
1544 void ip6_flush_pending_frames(struct sock *sk)
1545 {
1546         struct sk_buff *skb;
1547
1548         while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1549                 if (skb_dst(skb))
1550                         IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1551                                       IPSTATS_MIB_OUTDISCARDS);
1552                 kfree_skb(skb);
1553         }
1554
1555         ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1556 }