X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=net%2Fipv6%2Fip6_output.c;h=31dafaf0b652d7fc1ff965a092884bb43748d1fe;hb=e1931b784a8de324abf310fa3b5e3f25d3988233;hp=590d2b797197f92c8e214396dbebdb2c5498a5be;hpb=6869c4d8e066e21623c812c448a05f1ed931c9c6;p=safe%2Fjmp%2Flinux-2.6 diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 590d2b7..31dafaf 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1,9 +1,9 @@ /* * IPv6 output functions - * Linux INET6 implementation + * Linux INET6 implementation * * Authors: - * Pedro Roque + * Pedro Roque * * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $ * @@ -28,7 +28,6 @@ * for datagram xmit */ -#include #include #include #include @@ -39,6 +38,7 @@ #include #include #include +#include #include #include @@ -72,23 +72,14 @@ static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *f static inline int ip6_output_finish(struct sk_buff *skb) { - struct dst_entry *dst = skb->dst; - struct hh_cache *hh = dst->hh; - - if (hh) { - int hh_alen; - - read_lock_bh(&hh->hh_lock); - hh_alen = HH_DATA_ALIGN(hh->hh_len); - memcpy(skb->data - hh_alen, hh->hh_data, hh_alen); - read_unlock_bh(&hh->hh_lock); - skb_push(skb, hh->hh_len); - return hh->hh_output(skb); - } else if (dst->neighbour) + + if (dst->hh) + return neigh_hh_output(dst->hh, skb); + else if (dst->neighbour) return dst->neighbour->output(skb); - IP6_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); return -EINVAL; @@ -97,8 +88,8 @@ static inline int ip6_output_finish(struct sk_buff *skb) /* dev_loopback_xmit for use with netfilter. */ static int ip6_dev_loopback_xmit(struct sk_buff *newskb) { - newskb->mac.raw = newskb->data; - __skb_pull(newskb, newskb->nh.raw - newskb->data); + skb_reset_mac_header(newskb); + __skb_pull(newskb, skb_network_offset(newskb)); newskb->pkt_type = PACKET_LOOPBACK; newskb->ip_summed = CHECKSUM_UNNECESSARY; BUG_TRAP(newskb->dst); @@ -116,12 +107,13 @@ static int ip6_output2(struct sk_buff *skb) skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; - if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) { + if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL; + struct inet6_dev *idev = ip6_dst_idev(skb->dst); if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) && - ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr, - &skb->nh.ipv6h->saddr)) { + ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, + &ipv6_hdr(skb)->saddr)) { struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); /* Do not check for IFF_ALLMULTI; multicast routing @@ -132,59 +124,36 @@ static int ip6_output2(struct sk_buff *skb) newskb->dev, ip6_dev_loopback_xmit); - if (skb->nh.ipv6h->hop_limit == 0) { - IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); + if (ipv6_hdr(skb)->hop_limit == 0) { + IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return 0; } } - IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS); + IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS); } return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish); } +static inline int ip6_skb_dst_mtu(struct sk_buff *skb) +{ + struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; + + return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ? + skb->dst->dev->mtu : dst_mtu(skb->dst); +} + int ip6_output(struct sk_buff *skb) { - if (skb->len > dst_mtu(skb->dst) || dst_allfrag(skb->dst)) + if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || + dst_allfrag(skb->dst)) return ip6_fragment(skb, ip6_output2); else return ip6_output2(skb); } -#ifdef CONFIG_NETFILTER -int ip6_route_me_harder(struct sk_buff *skb) -{ - struct ipv6hdr *iph = skb->nh.ipv6h; - struct dst_entry *dst; - struct flowi fl = { - .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, - .nl_u = - { .ip6_u = - { .daddr = iph->daddr, - .saddr = iph->saddr, } }, - .proto = iph->nexthdr, - }; - - dst = ip6_route_output(skb->sk, &fl); - - if (dst->error) { - IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); - LIMIT_NETDEBUG( - printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n")); - dst_release(dst); - return -EINVAL; - } - - /* Drop old route. */ - dst_release(skb->dst); - - skb->dst = dst; - return 0; -} -#endif - /* * xmit an sk_buff (used by TCP) */ @@ -192,13 +161,13 @@ int ip6_route_me_harder(struct sk_buff *skb) int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, struct ipv6_txoptions *opt, int ipfragok) { - struct ipv6_pinfo *np = sk ? inet6_sk(sk) : NULL; + struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *first_hop = &fl->fl6_dst; struct dst_entry *dst = skb->dst; struct ipv6hdr *hdr; u8 proto = fl->proto; int seg_len = skb->len; - int hlimit; + int hlimit, tclass; u32 mtu; if (opt) { @@ -213,12 +182,14 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, if (skb_headroom(skb) < head_room) { struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); - kfree_skb(skb); - skb = skb2; - if (skb == NULL) { - IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); + if (skb2 == NULL) { + IP6_INC_STATS(ip6_dst_idev(skb->dst), + IPSTATS_MIB_OUTDISCARDS); + kfree_skb(skb); return -ENOBUFS; } + kfree_skb(skb); + skb = skb2; if (sk) skb_set_owner_w(skb, sk); } @@ -228,13 +199,14 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); } - hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr)); + skb_push(skb, sizeof(struct ipv6hdr)); + skb_reset_network_header(skb); + hdr = ipv6_hdr(skb); /* * Fill in the IPv6 header */ - *(u32*)hdr = htonl(0x60000000) | fl->fl6_flowlabel; hlimit = -1; if (np) hlimit = np->hop_limit; @@ -243,6 +215,14 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, if (hlimit < 0) hlimit = ipv6_get_hoplimit(dst->dev); + tclass = -1; + if (np) + tclass = np->tclass; + if (tclass < 0) + tclass = 0; + + *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel; + hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; hdr->hop_limit = hlimit; @@ -250,9 +230,12 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); ipv6_addr_copy(&hdr->daddr, first_hop); + skb->priority = sk->sk_priority; + mtu = dst_mtu(dst); - if ((skb->len <= mtu) || ipfragok) { - IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); + if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) { + IP6_INC_STATS(ip6_dst_idev(skb->dst), + IPSTATS_MIB_OUTREQUESTS); return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output); } @@ -261,11 +244,13 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n"); skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); - IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } +EXPORT_SYMBOL(ip6_xmit); + /* * To avoid extra problems ND packets are send through this * routine. It's code duplication but I really want to avoid @@ -286,10 +271,11 @@ int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev, totlen = len + sizeof(struct ipv6hdr); - hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr)); - skb->nh.ipv6h = hdr; + skb_reset_network_header(skb); + skb_put(skb, sizeof(struct ipv6hdr)); + hdr = ipv6_hdr(skb); - *(u32*)hdr = htonl(0x60000000); + *(__be32*)hdr = htonl(0x60000000); hdr->payload_len = htons(len); hdr->nexthdr = proto; @@ -309,7 +295,9 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel) read_lock(&ip6_ra_lock); for (ra = ip6_ra_chain; ra; ra = ra->next) { struct sock *sk = ra->sk; - if (sk && ra->sel == sel) { + if (sk && ra->sel == sel && + (!sk->sk_bound_dev_if || + sk->sk_bound_dev_if == skb->dev->ifindex)) { if (last) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) @@ -328,6 +316,57 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel) return 0; } +static int ip6_forward_proxy_check(struct sk_buff *skb) +{ + struct ipv6hdr *hdr = ipv6_hdr(skb); + u8 nexthdr = hdr->nexthdr; + int offset; + + if (ipv6_ext_hdr(nexthdr)) { + offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr); + if (offset < 0) + return 0; + } else + offset = sizeof(struct ipv6hdr); + + if (nexthdr == IPPROTO_ICMPV6) { + struct icmp6hdr *icmp6; + + if (!pskb_may_pull(skb, (skb_network_header(skb) + + offset + 1 - skb->data))) + return 0; + + icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); + + switch (icmp6->icmp6_type) { + case NDISC_ROUTER_SOLICITATION: + case NDISC_ROUTER_ADVERTISEMENT: + case NDISC_NEIGHBOUR_SOLICITATION: + case NDISC_NEIGHBOUR_ADVERTISEMENT: + case NDISC_REDIRECT: + /* For reaction involving unicast neighbor discovery + * message destined to the proxied address, pass it to + * input function. + */ + return 1; + default: + break; + } + } + + /* + * The proxying router can't forward traffic sent to a link-local + * address, so signal the sender and discard the packet. This + * behavior is clarified by the MIPv6 specification. + */ + if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { + dst_link_failure(skb); + return -1; + } + + return 0; +} + static inline int ip6_forward_finish(struct sk_buff *skb) { return dst_output(skb); @@ -336,18 +375,18 @@ static inline int ip6_forward_finish(struct sk_buff *skb) int ip6_forward(struct sk_buff *skb) { struct dst_entry *dst = skb->dst; - struct ipv6hdr *hdr = skb->nh.ipv6h; + struct ipv6hdr *hdr = ipv6_hdr(skb); struct inet6_skb_parm *opt = IP6CB(skb); - + if (ipv6_devconf.forwarding == 0) goto error; if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { - IP6_INC_STATS(IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); goto drop; } - skb->ip_summed = CHECKSUM_NONE; + skb_forward_csum(skb); /* * We DO NOT make any processing on @@ -363,7 +402,7 @@ int ip6_forward(struct sk_buff *skb) * that different fragments will go along one path. --ANK */ if (opt->ra) { - u8 *ptr = skb->nh.raw + opt->ra; + u8 *ptr = skb_network_header(skb) + opt->ra; if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3])) return 0; } @@ -376,13 +415,26 @@ int ip6_forward(struct sk_buff *skb) skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0, skb->dev); + IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -ETIMEDOUT; } + /* XXX: idev->cnf.proxy_ndp? */ + if (ipv6_devconf.proxy_ndp && + pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) { + int proxied = ip6_forward_proxy_check(skb); + if (proxied > 0) + return ip6_input(skb); + else if (proxied < 0) { + IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); + goto drop; + } + } + if (!xfrm6_route_forward(skb)) { - IP6_INC_STATS(IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); goto drop; } dst = skb->dst; @@ -411,38 +463,45 @@ int ip6_forward(struct sk_buff *skb) */ if (xrlim_allow(dst, 1*HZ)) ndisc_send_redirect(skb, n, target); - } else if (ipv6_addr_type(&hdr->saddr)&(IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK - |IPV6_ADDR_LINKLOCAL)) { + } else { + int addrtype = ipv6_addr_type(&hdr->saddr); + /* This check is security critical. */ - goto error; + if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK)) + goto error; + if (addrtype & IPV6_ADDR_LINKLOCAL) { + icmpv6_send(skb, ICMPV6_DEST_UNREACH, + ICMPV6_NOT_NEIGHBOUR, 0, skb->dev); + goto error; + } } if (skb->len > dst_mtu(dst)) { /* Again, force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev); - IP6_INC_STATS_BH(IPSTATS_MIB_INTOOBIGERRORS); - IP6_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); + IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } if (skb_cow(skb, dst->dev->hard_header_len)) { - IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); goto drop; } - hdr = skb->nh.ipv6h; + hdr = ipv6_hdr(skb); /* Mangling hops number delayed to point after skb COW */ - + hdr->hop_limit--; - IP6_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS); + IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish); error: - IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); + IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); drop: kfree_skb(skb); return -EINVAL; @@ -456,62 +515,65 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) dst_release(to->dst); to->dst = dst_clone(from->dst); to->dev = from->dev; + to->mark = from->mark; #ifdef CONFIG_NET_SCHED to->tc_index = from->tc_index; #endif -#ifdef CONFIG_NETFILTER - to->nfmark = from->nfmark; - /* Connection association is same as pre-frag packet */ - to->nfct = from->nfct; - nf_conntrack_get(to->nfct); - to->nfctinfo = from->nfctinfo; -#ifdef CONFIG_BRIDGE_NETFILTER - nf_bridge_put(to->nf_bridge); - to->nf_bridge = from->nf_bridge; - nf_bridge_get(to->nf_bridge); -#endif -#endif + nf_copy(to, from); + skb_copy_secmark(to, from); } int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) { u16 offset = sizeof(struct ipv6hdr); - struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1); - unsigned int packet_len = skb->tail - skb->nh.raw; + struct ipv6_opt_hdr *exthdr = + (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); + unsigned int packet_len = skb->tail - skb->network_header; int found_rhdr = 0; - *nexthdr = &skb->nh.ipv6h->nexthdr; + *nexthdr = &ipv6_hdr(skb)->nexthdr; while (offset + 1 <= packet_len) { switch (**nexthdr) { case NEXTHDR_HOP: + break; case NEXTHDR_ROUTING: + found_rhdr = 1; + break; case NEXTHDR_DEST: - if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1; - if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset; - offset += ipv6_optlen(exthdr); - *nexthdr = &exthdr->nexthdr; - exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) + break; +#endif + if (found_rhdr) + return offset; break; default : return offset; } + + offset += ipv6_optlen(exthdr); + *nexthdr = &exthdr->nexthdr; + exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + + offset); } return offset; } +EXPORT_SYMBOL_GPL(ip6_find_1stfragopt); static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct net_device *dev; struct sk_buff *frag; struct rt6_info *rt = (struct rt6_info*)skb->dst; + struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; struct ipv6hdr *tmp_hdr; struct frag_hdr *fh; unsigned int mtu, hlen, left, len; - u32 frag_id = 0; + __be32 frag_id = 0; int ptr, offset = 0, err=0; u8 *prevhdr, nexthdr = 0; @@ -519,7 +581,25 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) hlen = ip6_find_1stfragopt(skb, &prevhdr); nexthdr = *prevhdr; - mtu = dst_mtu(&rt->u.dst) - hlen - sizeof(struct frag_hdr); + mtu = ip6_skb_dst_mtu(skb); + + /* We must not fragment if the socket is set to force MTU discovery + * or if the skb it not generated by a local socket. (This last + * check should be redundant, but it's free.) + */ + if (!np || np->pmtudisc >= IPV6_PMTUDISC_DO) { + skb->dev = skb->dst->dev; + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); + kfree_skb(skb); + return -EMSGSIZE; + } + + if (np && np->frag_size < mtu) { + if (np->frag_size) + mtu = np->frag_size; + } + mtu -= hlen + sizeof(struct frag_hdr); if (skb_shinfo(skb)->frag_list) { int first_len = skb_pagelen(skb); @@ -555,18 +635,18 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) skb_shinfo(skb)->frag_list = NULL; /* BUILD HEADER */ - tmp_hdr = kmalloc(hlen, GFP_ATOMIC); + *prevhdr = NEXTHDR_FRAGMENT; + tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); if (!tmp_hdr) { - IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); return -ENOMEM; } - *prevhdr = NEXTHDR_FRAGMENT; - memcpy(tmp_hdr, skb->nh.raw, hlen); __skb_pull(skb, hlen); fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr)); - skb->nh.raw = __skb_push(skb, hlen); - memcpy(skb->nh.raw, tmp_hdr, hlen); + __skb_push(skb, hlen); + skb_reset_network_header(skb); + memcpy(skb_network_header(skb), tmp_hdr, hlen); ipv6_select_ident(skb, fh); fh->nexthdr = nexthdr; @@ -577,18 +657,22 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) first_len = skb_pagelen(skb); skb->data_len = first_len - skb_headlen(skb); skb->len = first_len; - skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr)); - + ipv6_hdr(skb)->payload_len = htons(first_len - + sizeof(struct ipv6hdr)); + + dst_hold(&rt->u.dst); for (;;) { /* Prepare header of the next frame, * before previous one went down. */ if (frag) { frag->ip_summed = CHECKSUM_NONE; - frag->h.raw = frag->data; + skb_reset_transport_header(frag); fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr)); - frag->nh.raw = __skb_push(frag, hlen); - memcpy(frag->nh.raw, tmp_hdr, hlen); + __skb_push(frag, hlen); + skb_reset_network_header(frag); + memcpy(skb_network_header(frag), tmp_hdr, + hlen); offset += skb->len - hlen - sizeof(struct frag_hdr); fh->nexthdr = nexthdr; fh->reserved = 0; @@ -596,11 +680,16 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (frag->next != NULL) fh->frag_off |= htons(IP6_MF); fh->identification = frag_id; - frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); + ipv6_hdr(frag)->payload_len = + htons(frag->len - + sizeof(struct ipv6hdr)); ip6_copy_metadata(frag, skb); } - + err = output(skb); + if(!err) + IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES); + if (err || !frag) break; @@ -609,11 +698,11 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) skb->next = NULL; } - if (tmp_hdr) - kfree(tmp_hdr); + kfree(tmp_hdr); if (err == 0) { - IP6_INC_STATS(IPSTATS_MIB_FRAGOKS); + IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS); + dst_release(&rt->u.dst); return 0; } @@ -623,7 +712,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) frag = skb; } - IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS); + dst_release(&rt->u.dst); return err; } @@ -655,8 +745,9 @@ slow_path: */ if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { - NETDEBUG(printk(KERN_INFO "IPv6: frag: no memory for new fragment!\n")); - IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS); + NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); + IP6_INC_STATS(ip6_dst_idev(skb->dst), + IPSTATS_MIB_FRAGFAILS); err = -ENOMEM; goto fail; } @@ -668,9 +759,10 @@ slow_path: ip6_copy_metadata(frag, skb); skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev)); skb_put(frag, len + hlen + sizeof(struct frag_hdr)); - frag->nh.raw = frag->data; - fh = (struct frag_hdr*)(frag->data + hlen); - frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr); + skb_reset_network_header(frag); + fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); + frag->transport_header = (frag->network_header + hlen + + sizeof(struct frag_hdr)); /* * Charge the memory for the fragment to any owner @@ -682,14 +774,14 @@ slow_path: /* * Copy the packet header into the new buffer. */ - memcpy(frag->nh.raw, skb->data, hlen); + skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); /* * Build fragment header. */ fh->nexthdr = nexthdr; fh->reserved = 0; - if (frag_id) { + if (!frag_id) { ipv6_select_ident(skb, fh); frag_id = fh->identification; } else @@ -698,14 +790,15 @@ slow_path: /* * Copy a block of the IP datagram. */ - if (skb_copy_bits(skb, ptr, frag->h.raw, len)) + if (skb_copy_bits(skb, ptr, skb_transport_header(skb), len)) BUG(); left -= len; fh->frag_off = htons(offset); if (left > 0) fh->frag_off |= htons(IP6_MF); - frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); + ipv6_hdr(frag)->payload_len = htons(frag->len - + sizeof(struct ipv6hdr)); ptr += len; offset += len; @@ -713,64 +806,77 @@ slow_path: /* * Put this fragment into the sending queue. */ - - IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES); - err = output(frag); if (err) goto fail; + + IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES); } + IP6_INC_STATS(ip6_dst_idev(skb->dst), + IPSTATS_MIB_FRAGOKS); kfree_skb(skb); - IP6_INC_STATS(IPSTATS_MIB_FRAGOKS); return err; fail: - kfree_skb(skb); - IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS(ip6_dst_idev(skb->dst), + IPSTATS_MIB_FRAGFAILS); + kfree_skb(skb); return err; } -int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) +static inline int ip6_rt_check(struct rt6key *rt_key, + struct in6_addr *fl_addr, + struct in6_addr *addr_cache) { - int err = 0; + return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && + (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache))); +} - *dst = NULL; - if (sk) { - struct ipv6_pinfo *np = inet6_sk(sk); - - *dst = sk_dst_check(sk, np->dst_cookie); - if (*dst) { - struct rt6_info *rt = (struct rt6_info*)*dst; - - /* Yes, checking route validity in not connected - case is not very simple. Take into account, - that we do not support routing by source, TOS, - and MSG_DONTROUTE --ANK (980726) - - 1. If route was host route, check that - cached destination is current. - If it is network route, we still may - check its validity using saved pointer - to the last used address: daddr_cache. - We do not want to save whole address now, - (because main consumer of this service - is tcp, which has not this problem), - so that the last trick works only on connected - sockets. - 2. oif also should be the same. - */ - - if (((rt->rt6i_dst.plen != 128 || - !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr)) - && (np->daddr_cache == NULL || - !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache))) - || (fl->oif && fl->oif != (*dst)->dev->ifindex)) { - dst_release(*dst); - *dst = NULL; - } - } +static struct dst_entry *ip6_sk_dst_check(struct sock *sk, + struct dst_entry *dst, + struct flowi *fl) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + struct rt6_info *rt = (struct rt6_info *)dst; + + if (!dst) + goto out; + + /* Yes, checking route validity in not connected + * case is not very simple. Take into account, + * that we do not support routing by source, TOS, + * and MSG_DONTROUTE --ANK (980726) + * + * 1. ip6_rt_check(): If route was host route, + * check that cached destination is current. + * If it is network route, we still may + * check its validity using saved pointer + * to the last used address: daddr_cache. + * We do not want to save whole address now, + * (because main consumer of this service + * is tcp, which has not this problem), + * so that the last trick works only on connected + * sockets. + * 2. oif also should be the same. + */ + if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) || +#ifdef CONFIG_IPV6_SUBTREES + ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) || +#endif + (fl->oif && fl->oif != dst->dev->ifindex)) { + dst_release(dst); + dst = NULL; } +out: + return dst; +} + +static int ip6_dst_lookup_tail(struct sock *sk, + struct dst_entry **dst, struct flowi *fl) +{ + int err; + if (*dst == NULL) *dst = ip6_route_output(sk, fl); @@ -779,11 +885,45 @@ int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) if (ipv6_addr_any(&fl->fl6_src)) { err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src); - if (err) goto out_err_release; } +#ifdef CONFIG_IPV6_OPTIMISTIC_DAD + /* + * Here if the dst entry we've looked up + * has a neighbour entry that is in the INCOMPLETE + * state and the src address from the flow is + * marked as OPTIMISTIC, we release the found + * dst entry and replace it instead with the + * dst entry of the nexthop router + */ + if (!((*dst)->neighbour->nud_state & NUD_VALID)) { + struct inet6_ifaddr *ifp; + struct flowi fl_gw; + int redirect; + + ifp = ipv6_get_ifaddr(&fl->fl6_src, (*dst)->dev, 1); + + redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); + if (ifp) + in6_ifa_put(ifp); + + if (redirect) { + /* + * We need to get the dst entry for the + * default router instead + */ + dst_release(*dst); + memcpy(&fl_gw, fl, sizeof(struct flowi)); + memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr)); + *dst = ip6_route_output(sk, &fl_gw); + if ((err = (*dst)->error)) + goto out_err_release; + } + } +#endif + return 0; out_err_release: @@ -792,10 +932,114 @@ out_err_release: return err; } -int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), - void *from, int length, int transhdrlen, - int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt, - unsigned int flags) +/** + * ip6_dst_lookup - perform route lookup on flow + * @sk: socket which provides route info + * @dst: pointer to dst_entry * for result + * @fl: flow to lookup + * + * This function performs a route lookup on the given flow. + * + * It returns zero on success, or a standard errno code on error. + */ +int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) +{ + *dst = NULL; + return ip6_dst_lookup_tail(sk, dst, fl); +} +EXPORT_SYMBOL_GPL(ip6_dst_lookup); + +/** + * ip6_sk_dst_lookup - perform socket cached route lookup on flow + * @sk: socket which provides the dst cache and route info + * @dst: pointer to dst_entry * for result + * @fl: flow to lookup + * + * This function performs a route lookup on the given flow with the + * possibility of using the cached route in the socket if it is valid. + * It will take the socket dst lock when operating on the dst cache. + * As a result, this function can only be used in process context. + * + * It returns zero on success, or a standard errno code on error. + */ +int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) +{ + *dst = NULL; + if (sk) { + *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); + *dst = ip6_sk_dst_check(sk, *dst, fl); + } + + return ip6_dst_lookup_tail(sk, dst, fl); +} +EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup); + +static inline int ip6_ufo_append_data(struct sock *sk, + int getfrag(void *from, char *to, int offset, int len, + int odd, struct sk_buff *skb), + void *from, int length, int hh_len, int fragheaderlen, + int transhdrlen, int mtu,unsigned int flags) + +{ + struct sk_buff *skb; + int err; + + /* There is support for UDP large send offload by network + * device, so create one single skb packet containing complete + * udp datagram + */ + if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { + skb = sock_alloc_send_skb(sk, + hh_len + fragheaderlen + transhdrlen + 20, + (flags & MSG_DONTWAIT), &err); + if (skb == NULL) + return -ENOMEM; + + /* reserve space for Hardware header */ + skb_reserve(skb, hh_len); + + /* create space for UDP/IP header */ + skb_put(skb,fragheaderlen + transhdrlen); + + /* initialize network header pointer */ + skb_reset_network_header(skb); + + /* initialize protocol header pointer */ + skb->transport_header = skb->network_header + fragheaderlen; + + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum = 0; + sk->sk_sndmsg_off = 0; + } + + err = skb_append_datato_frags(sk,skb, getfrag, from, + (length - transhdrlen)); + if (!err) { + struct frag_hdr fhdr; + + /* specify the length of each IP datagram fragment*/ + skb_shinfo(skb)->gso_size = mtu - fragheaderlen - + sizeof(struct frag_hdr); + skb_shinfo(skb)->gso_type = SKB_GSO_UDP; + ipv6_select_ident(skb, &fhdr); + skb_shinfo(skb)->ip6_frag_id = fhdr.identification; + __skb_queue_tail(&sk->sk_write_queue, skb); + + return 0; + } + /* There is not enough support do UPD LSO, + * so follow normal path + */ + kfree_skb(skb); + + return err; +} + +int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, + int offset, int len, int odd, struct sk_buff *skb), + void *from, int length, int transhdrlen, + int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl, + struct rt6_info *rt, unsigned int flags) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -833,7 +1077,14 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse np->cork.rt = rt; inet->cork.fl = *fl; np->cork.hop_limit = hlimit; - inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path); + np->cork.tclass = tclass; + mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? + rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path); + if (np->frag_size < mtu) { + if (np->frag_size) + mtu = np->frag_size; + } + inet->cork.fragsize = mtu; if (dst_allfrag(rt->u.dst.path)) inet->cork.flags |= IPCORK_ALLFRAG; inet->cork.length = 0; @@ -854,7 +1105,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); - fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0); + fragheaderlen = sizeof(struct ipv6hdr) + rt->u.dst.nfheader_len + (opt ? opt->opt_nflen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { @@ -871,16 +1122,26 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse * fragment alignment (= 8-15 octects, in total). * * Note that we may need to "move" the data from the tail of - * of the buffer to the new fragment when we split + * of the buffer to the new fragment when we split * the message. * - * FIXME: It may be fragmented into multiple chunks + * FIXME: It may be fragmented into multiple chunks * at once if non-fragmentable extension headers * are too large. - * --yoshfuji + * --yoshfuji */ inet->cork.length += length; + if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && + (rt->u.dst.dev->features & NETIF_F_UFO)) { + + err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len, + fragheaderlen, transhdrlen, mtu, + flags); + if (err) + goto error; + return 0; + } if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) goto alloc_new_skb; @@ -932,7 +1193,7 @@ alloc_new_skb: /* * We just reserve space for fragment header. - * Note: this may be overallocation if the message + * Note: this may be overallocation if the message * (without MSG_MORE) fits into the MTU. */ alloclen += sizeof(struct frag_hdr); @@ -965,10 +1226,10 @@ alloc_new_skb: * Find where to start putting bytes */ data = skb_put(skb, fraglen); - skb->nh.raw = data + exthdrlen; + skb_set_network_header(skb, exthdrlen); data += fragheaderlen; - skb->h.raw = data + exthdrlen; - + skb->transport_header = (skb->network_header + + fragheaderlen); if (fraggap) { skb->csum = skb_copy_and_csum_bits( skb_prev, maxfraglen, @@ -976,7 +1237,7 @@ alloc_new_skb: skb_prev->csum = csum_sub(skb_prev->csum, skb->csum); data += fraggap; - skb_trim(skb_prev, maxfraglen); + pskb_trim_unique(skb_prev, maxfraglen); } copy = datalen - transhdrlen - fraggap; if (copy < 0) { @@ -1068,7 +1329,7 @@ alloc_new_skb: return 0; error: inet->cork.length -= length; - IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); return err; } @@ -1091,10 +1352,10 @@ int ip6_push_pending_frames(struct sock *sk) tail_skb = &(skb_shinfo(skb)->frag_list); /* move skb->data to ip header from ext header */ - if (skb->data < skb->nh.raw) - __skb_pull(skb, skb->nh.raw - skb->data); + if (skb->data < skb_network_header(skb)) + __skb_pull(skb, skb_network_offset(skb)); while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { - __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw); + __skb_pull(tmp_skb, skb_network_header_len(skb)); *tail_skb = tmp_skb; tail_skb = &(tmp_skb->next); skb->len += tmp_skb->len; @@ -1106,15 +1367,18 @@ int ip6_push_pending_frames(struct sock *sk) } ipv6_addr_copy(final_dst, &fl->fl6_dst); - __skb_pull(skb, skb->h.raw - skb->nh.raw); + __skb_pull(skb, skb_network_header_len(skb)); if (opt && opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); if (opt && opt->opt_nflen) ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); - skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr)); - - *(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000); + skb_push(skb, sizeof(struct ipv6hdr)); + skb_reset_network_header(skb); + hdr = ipv6_hdr(skb); + + *(__be32*)hdr = fl->fl6_flowlabel | + htonl(0x60000000 | ((int)np->cork.tclass << 20)); if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); @@ -1125,8 +1389,10 @@ int ip6_push_pending_frames(struct sock *sk) ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); ipv6_addr_copy(&hdr->daddr, final_dst); + skb->priority = sk->sk_priority; + skb->dst = dst_clone(&rt->u.dst); - IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); + IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output); if (err) { if (err > 0) @@ -1137,10 +1403,8 @@ int ip6_push_pending_frames(struct sock *sk) out: inet->cork.flags &= ~IPCORK_OPT; - if (np->cork.opt) { - kfree(np->cork.opt); - np->cork.opt = NULL; - } + kfree(np->cork.opt); + np->cork.opt = NULL; if (np->cork.rt) { dst_release(&np->cork.rt->u.dst); np->cork.rt = NULL; @@ -1159,16 +1423,15 @@ void ip6_flush_pending_frames(struct sock *sk) struct sk_buff *skb; while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { - IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(ip6_dst_idev(skb->dst), + IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); } inet->cork.flags &= ~IPCORK_OPT; - if (np->cork.opt) { - kfree(np->cork.opt); - np->cork.opt = NULL; - } + kfree(np->cork.opt); + np->cork.opt = NULL; if (np->cork.rt) { dst_release(&np->cork.rt->u.dst); np->cork.rt = NULL;