3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
49 #include <net/ndisc.h>
50 #include <net/inet6_hashtables.h>
51 #include <net/inet6_connection_sock.h>
53 #include <net/transp_v6.h>
54 #include <net/addrconf.h>
55 #include <net/ip6_route.h>
56 #include <net/ip6_checksum.h>
57 #include <net/inet_ecn.h>
58 #include <net/protocol.h>
60 #include <net/addrconf.h>
62 #include <net/dsfield.h>
64 #include <asm/uaccess.h>
66 #include <linux/proc_fs.h>
67 #include <linux/seq_file.h>
69 static void tcp_v6_send_reset(struct sk_buff *skb);
70 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
71 static void tcp_v6_send_check(struct sock *sk, int len,
74 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
75 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
77 static struct inet_connection_sock_af_ops ipv6_mapped;
78 static struct inet_connection_sock_af_ops ipv6_specific;
80 int inet6_csk_bind_conflict(const struct sock *sk,
81 const struct inet_bind_bucket *tb)
83 const struct sock *sk2;
84 const struct hlist_node *node;
86 /* We must walk the whole port owner list in this case. -DaveM */
87 sk_for_each_bound(sk2, node, &tb->owners) {
89 (!sk->sk_bound_dev_if ||
90 !sk2->sk_bound_dev_if ||
91 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
92 (!sk->sk_reuse || !sk2->sk_reuse ||
93 sk2->sk_state == TCP_LISTEN) &&
94 ipv6_rcv_saddr_equal(sk, sk2))
101 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
103 return inet_csk_get_port(&tcp_hashinfo, sk, snum,
104 inet6_csk_bind_conflict);
107 static void tcp_v6_hash(struct sock *sk)
109 if (sk->sk_state != TCP_CLOSE) {
110 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
115 __inet6_hash(&tcp_hashinfo, sk);
120 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
121 struct in6_addr *saddr,
122 struct in6_addr *daddr,
125 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
128 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
130 if (skb->protocol == htons(ETH_P_IPV6)) {
131 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
132 skb->nh.ipv6h->saddr.s6_addr32,
136 return secure_tcp_sequence_number(skb->nh.iph->daddr,
143 static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
144 struct inet_timewait_sock **twp)
146 struct inet_sock *inet = inet_sk(sk);
147 const struct ipv6_pinfo *np = inet6_sk(sk);
148 const struct in6_addr *daddr = &np->rcv_saddr;
149 const struct in6_addr *saddr = &np->daddr;
150 const int dif = sk->sk_bound_dev_if;
151 const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
152 unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport);
153 struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
155 const struct hlist_node *node;
156 struct inet_timewait_sock *tw;
158 prefetch(head->chain.first);
159 write_lock(&head->lock);
161 /* Check TIME-WAIT sockets first. */
162 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
163 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
167 if(*((__u32 *)&(tw->tw_dport)) == ports &&
168 sk2->sk_family == PF_INET6 &&
169 ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
170 ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
171 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
172 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
173 struct tcp_sock *tp = tcp_sk(sk);
175 if (tcptw->tw_ts_recent_stamp &&
177 (sysctl_tcp_tw_reuse &&
178 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
179 /* See comment in tcp_ipv4.c */
180 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
183 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
184 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
193 /* And established part... */
194 sk_for_each(sk2, node, &head->chain) {
195 if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
200 BUG_TRAP(sk_unhashed(sk));
201 __sk_add_node(sk, &head->chain);
203 sock_prot_inc_use(sk->sk_prot);
204 write_unlock(&head->lock);
208 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
210 /* Silly. Should hash-dance instead... */
211 inet_twsk_deschedule(tw, &tcp_death_row);
212 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
219 write_unlock(&head->lock);
220 return -EADDRNOTAVAIL;
223 static inline u32 tcpv6_port_offset(const struct sock *sk)
225 const struct inet_sock *inet = inet_sk(sk);
226 const struct ipv6_pinfo *np = inet6_sk(sk);
228 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
233 static int tcp_v6_hash_connect(struct sock *sk)
235 unsigned short snum = inet_sk(sk)->num;
236 struct inet_bind_hashbucket *head;
237 struct inet_bind_bucket *tb;
241 int low = sysctl_local_port_range[0];
242 int high = sysctl_local_port_range[1];
243 int range = high - low;
247 u32 offset = hint + tcpv6_port_offset(sk);
248 struct hlist_node *node;
249 struct inet_timewait_sock *tw = NULL;
252 for (i = 1; i <= range; i++) {
253 port = low + (i + offset) % range;
254 head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
255 spin_lock(&head->lock);
257 /* Does not bother with rcv_saddr checks,
258 * because the established check is already
261 inet_bind_bucket_for_each(tb, node, &head->chain) {
262 if (tb->port == port) {
263 BUG_TRAP(!hlist_empty(&tb->owners));
264 if (tb->fastreuse >= 0)
266 if (!__tcp_v6_check_established(sk,
274 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
276 spin_unlock(&head->lock);
283 spin_unlock(&head->lock);
287 return -EADDRNOTAVAIL;
292 /* Head lock still held and bh's disabled */
293 inet_bind_hash(sk, tb, port);
294 if (sk_unhashed(sk)) {
295 inet_sk(sk)->sport = htons(port);
296 __inet6_hash(&tcp_hashinfo, sk);
298 spin_unlock(&head->lock);
301 inet_twsk_deschedule(tw, &tcp_death_row);
309 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
310 tb = inet_csk(sk)->icsk_bind_hash;
311 spin_lock_bh(&head->lock);
313 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
314 __inet6_hash(&tcp_hashinfo, sk);
315 spin_unlock_bh(&head->lock);
318 spin_unlock(&head->lock);
319 /* No definite answer... Walk to established hash table */
320 ret = __tcp_v6_check_established(sk, snum, NULL);
327 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
330 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
331 struct inet_sock *inet = inet_sk(sk);
332 struct ipv6_pinfo *np = inet6_sk(sk);
333 struct tcp_sock *tp = tcp_sk(sk);
334 struct in6_addr *saddr = NULL, *final_p = NULL, final;
336 struct dst_entry *dst;
340 if (addr_len < SIN6_LEN_RFC2133)
343 if (usin->sin6_family != AF_INET6)
344 return(-EAFNOSUPPORT);
346 memset(&fl, 0, sizeof(fl));
349 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
350 IP6_ECN_flow_init(fl.fl6_flowlabel);
351 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
352 struct ip6_flowlabel *flowlabel;
353 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
354 if (flowlabel == NULL)
356 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
357 fl6_sock_release(flowlabel);
362 * connect() to INADDR_ANY means loopback (BSD'ism).
365 if(ipv6_addr_any(&usin->sin6_addr))
366 usin->sin6_addr.s6_addr[15] = 0x1;
368 addr_type = ipv6_addr_type(&usin->sin6_addr);
370 if(addr_type & IPV6_ADDR_MULTICAST)
373 if (addr_type&IPV6_ADDR_LINKLOCAL) {
374 if (addr_len >= sizeof(struct sockaddr_in6) &&
375 usin->sin6_scope_id) {
376 /* If interface is set while binding, indices
379 if (sk->sk_bound_dev_if &&
380 sk->sk_bound_dev_if != usin->sin6_scope_id)
383 sk->sk_bound_dev_if = usin->sin6_scope_id;
386 /* Connect to link-local address requires an interface */
387 if (!sk->sk_bound_dev_if)
391 if (tp->rx_opt.ts_recent_stamp &&
392 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
393 tp->rx_opt.ts_recent = 0;
394 tp->rx_opt.ts_recent_stamp = 0;
398 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
399 np->flow_label = fl.fl6_flowlabel;
405 if (addr_type == IPV6_ADDR_MAPPED) {
406 u32 exthdrlen = tp->ext_header_len;
407 struct sockaddr_in sin;
409 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
411 if (__ipv6_only_sock(sk))
414 sin.sin_family = AF_INET;
415 sin.sin_port = usin->sin6_port;
416 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
418 inet_csk(sk)->icsk_af_ops = &ipv6_mapped;
419 sk->sk_backlog_rcv = tcp_v4_do_rcv;
421 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
424 tp->ext_header_len = exthdrlen;
425 inet_csk(sk)->icsk_af_ops = &ipv6_specific;
426 sk->sk_backlog_rcv = tcp_v6_do_rcv;
429 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
431 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
438 if (!ipv6_addr_any(&np->rcv_saddr))
439 saddr = &np->rcv_saddr;
441 fl.proto = IPPROTO_TCP;
442 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
443 ipv6_addr_copy(&fl.fl6_src,
444 (saddr ? saddr : &np->saddr));
445 fl.oif = sk->sk_bound_dev_if;
446 fl.fl_ip_dport = usin->sin6_port;
447 fl.fl_ip_sport = inet->sport;
449 if (np->opt && np->opt->srcrt) {
450 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
451 ipv6_addr_copy(&final, &fl.fl6_dst);
452 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
456 err = ip6_dst_lookup(sk, &dst, &fl);
460 ipv6_addr_copy(&fl.fl6_dst, final_p);
462 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
467 ipv6_addr_copy(&np->rcv_saddr, saddr);
470 /* set the source address */
471 ipv6_addr_copy(&np->saddr, saddr);
472 inet->rcv_saddr = LOOPBACK4_IPV6;
474 ip6_dst_store(sk, dst, NULL);
475 sk->sk_route_caps = dst->dev->features &
476 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
478 tp->ext_header_len = 0;
480 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
482 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
484 inet->dport = usin->sin6_port;
486 tcp_set_state(sk, TCP_SYN_SENT);
487 err = tcp_v6_hash_connect(sk);
492 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
497 err = tcp_connect(sk);
504 tcp_set_state(sk, TCP_CLOSE);
508 sk->sk_route_caps = 0;
512 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
513 int type, int code, int offset, __u32 info)
515 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
516 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
517 struct ipv6_pinfo *np;
523 sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
524 th->source, skb->dev->ifindex);
527 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
531 if (sk->sk_state == TCP_TIME_WAIT) {
532 inet_twsk_put((struct inet_timewait_sock *)sk);
537 if (sock_owned_by_user(sk))
538 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
540 if (sk->sk_state == TCP_CLOSE)
544 seq = ntohl(th->seq);
545 if (sk->sk_state != TCP_LISTEN &&
546 !between(seq, tp->snd_una, tp->snd_nxt)) {
547 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
553 if (type == ICMPV6_PKT_TOOBIG) {
554 struct dst_entry *dst = NULL;
556 if (sock_owned_by_user(sk))
558 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
561 /* icmp should have updated the destination cache entry */
562 dst = __sk_dst_check(sk, np->dst_cookie);
565 struct inet_sock *inet = inet_sk(sk);
568 /* BUGGG_FUTURE: Again, it is not clear how
569 to handle rthdr case. Ignore this complexity
572 memset(&fl, 0, sizeof(fl));
573 fl.proto = IPPROTO_TCP;
574 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
575 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
576 fl.oif = sk->sk_bound_dev_if;
577 fl.fl_ip_dport = inet->dport;
578 fl.fl_ip_sport = inet->sport;
580 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
581 sk->sk_err_soft = -err;
585 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
586 sk->sk_err_soft = -err;
593 if (tp->pmtu_cookie > dst_mtu(dst)) {
594 tcp_sync_mss(sk, dst_mtu(dst));
595 tcp_simple_retransmit(sk);
596 } /* else let the usual retransmit timer handle it */
601 icmpv6_err_convert(type, code, &err);
603 /* Might be for an request_sock */
604 switch (sk->sk_state) {
605 struct request_sock *req, **prev;
607 if (sock_owned_by_user(sk))
610 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
611 &hdr->saddr, inet6_iif(skb));
615 /* ICMPs are not backlogged, hence we cannot get
616 * an established socket here.
618 BUG_TRAP(req->sk == NULL);
620 if (seq != tcp_rsk(req)->snt_isn) {
621 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
625 inet_csk_reqsk_queue_drop(sk, req, prev);
629 case TCP_SYN_RECV: /* Cannot happen.
630 It can, it SYNs are crossed. --ANK */
631 if (!sock_owned_by_user(sk)) {
632 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
634 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
638 sk->sk_err_soft = err;
642 if (!sock_owned_by_user(sk) && np->recverr) {
644 sk->sk_error_report(sk);
646 sk->sk_err_soft = err;
654 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
655 struct dst_entry *dst)
657 struct inet6_request_sock *treq = inet6_rsk(req);
658 struct ipv6_pinfo *np = inet6_sk(sk);
659 struct sk_buff * skb;
660 struct ipv6_txoptions *opt = NULL;
661 struct in6_addr * final_p = NULL, final;
665 memset(&fl, 0, sizeof(fl));
666 fl.proto = IPPROTO_TCP;
667 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
668 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
669 fl.fl6_flowlabel = 0;
671 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
672 fl.fl_ip_sport = inet_sk(sk)->sport;
677 np->rxopt.bits.osrcrt == 2 &&
679 struct sk_buff *pktopts = treq->pktopts;
680 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
682 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
685 if (opt && opt->srcrt) {
686 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
687 ipv6_addr_copy(&final, &fl.fl6_dst);
688 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
692 err = ip6_dst_lookup(sk, &dst, &fl);
696 ipv6_addr_copy(&fl.fl6_dst, final_p);
697 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
701 skb = tcp_make_synack(sk, dst, req);
703 struct tcphdr *th = skb->h.th;
705 th->check = tcp_v6_check(th, skb->len,
706 &treq->loc_addr, &treq->rmt_addr,
707 csum_partial((char *)th, skb->len, skb->csum));
709 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
710 err = ip6_xmit(sk, skb, &fl, opt, 0);
711 if (err == NET_XMIT_CN)
716 if (opt && opt != np->opt)
717 sock_kfree_s(sk, opt, opt->tot_len);
721 static void tcp_v6_reqsk_destructor(struct request_sock *req)
723 if (inet6_rsk(req)->pktopts)
724 kfree_skb(inet6_rsk(req)->pktopts);
727 static struct request_sock_ops tcp6_request_sock_ops = {
729 .obj_size = sizeof(struct tcp6_request_sock),
730 .rtx_syn_ack = tcp_v6_send_synack,
731 .send_ack = tcp_v6_reqsk_send_ack,
732 .destructor = tcp_v6_reqsk_destructor,
733 .send_reset = tcp_v6_send_reset
736 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
738 struct ipv6_pinfo *np = inet6_sk(sk);
739 struct inet6_skb_parm *opt = IP6CB(skb);
742 if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
743 ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && np->rxopt.bits.rxflow) ||
744 (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) ||
745 ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
752 static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
754 struct ipv6_pinfo *np = inet6_sk(sk);
755 struct tcphdr *th = skb->h.th;
757 if (skb->ip_summed == CHECKSUM_HW) {
758 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
759 skb->csum = offsetof(struct tcphdr, check);
761 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
762 csum_partial((char *)th, th->doff<<2,
768 static void tcp_v6_send_reset(struct sk_buff *skb)
770 struct tcphdr *th = skb->h.th, *t1;
771 struct sk_buff *buff;
777 if (!ipv6_unicast_destination(skb))
781 * We need to grab some memory, and put together an RST,
782 * and then put it into the queue to be sent.
785 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
790 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
792 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
794 /* Swap the send and the receive. */
795 memset(t1, 0, sizeof(*t1));
796 t1->dest = th->source;
797 t1->source = th->dest;
798 t1->doff = sizeof(*t1)/4;
802 t1->seq = th->ack_seq;
805 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
806 + skb->len - (th->doff<<2));
809 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
811 memset(&fl, 0, sizeof(fl));
812 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
813 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
815 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
816 sizeof(*t1), IPPROTO_TCP,
819 fl.proto = IPPROTO_TCP;
820 fl.oif = inet6_iif(skb);
821 fl.fl_ip_dport = t1->dest;
822 fl.fl_ip_sport = t1->source;
824 /* sk = NULL, but it is safe for now. RST socket required. */
825 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
827 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
828 ip6_xmit(NULL, buff, &fl, NULL, 0);
829 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
830 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
838 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
840 struct tcphdr *th = skb->h.th, *t1;
841 struct sk_buff *buff;
843 int tot_len = sizeof(struct tcphdr);
848 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
853 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
855 t1 = (struct tcphdr *) skb_push(buff,tot_len);
857 /* Swap the send and the receive. */
858 memset(t1, 0, sizeof(*t1));
859 t1->dest = th->source;
860 t1->source = th->dest;
861 t1->doff = tot_len/4;
862 t1->seq = htonl(seq);
863 t1->ack_seq = htonl(ack);
865 t1->window = htons(win);
868 u32 *ptr = (u32*)(t1 + 1);
869 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
870 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
871 *ptr++ = htonl(tcp_time_stamp);
875 buff->csum = csum_partial((char *)t1, tot_len, 0);
877 memset(&fl, 0, sizeof(fl));
878 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
879 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
881 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
882 tot_len, IPPROTO_TCP,
885 fl.proto = IPPROTO_TCP;
886 fl.oif = inet6_iif(skb);
887 fl.fl_ip_dport = t1->dest;
888 fl.fl_ip_sport = t1->source;
890 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
891 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
892 ip6_xmit(NULL, buff, &fl, NULL, 0);
893 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
901 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
903 struct inet_timewait_sock *tw = inet_twsk(sk);
904 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
906 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
907 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
908 tcptw->tw_ts_recent);
913 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
915 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
919 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
921 struct request_sock *req, **prev;
922 const struct tcphdr *th = skb->h.th;
925 /* Find possible connection requests. */
926 req = inet6_csk_search_req(sk, &prev, th->source,
927 &skb->nh.ipv6h->saddr,
928 &skb->nh.ipv6h->daddr, inet6_iif(skb));
930 return tcp_check_req(sk, skb, req, prev);
932 nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
933 th->source, &skb->nh.ipv6h->daddr,
934 ntohs(th->dest), inet6_iif(skb));
937 if (nsk->sk_state != TCP_TIME_WAIT) {
941 inet_twsk_put((struct inet_timewait_sock *)nsk);
945 #if 0 /*def CONFIG_SYN_COOKIES*/
946 if (!th->rst && !th->syn && th->ack)
947 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
952 /* FIXME: this is substantially similar to the ipv4 code.
953 * Can some kind of merge be done? -- erics
955 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
957 struct inet6_request_sock *treq;
958 struct ipv6_pinfo *np = inet6_sk(sk);
959 struct tcp_options_received tmp_opt;
960 struct tcp_sock *tp = tcp_sk(sk);
961 struct request_sock *req = NULL;
962 __u32 isn = TCP_SKB_CB(skb)->when;
964 if (skb->protocol == htons(ETH_P_IP))
965 return tcp_v4_conn_request(sk, skb);
967 if (!ipv6_unicast_destination(skb))
971 * There are no SYN attacks on IPv6, yet...
973 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
975 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
979 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
982 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
986 tcp_clear_options(&tmp_opt);
987 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
988 tmp_opt.user_mss = tp->rx_opt.user_mss;
990 tcp_parse_options(skb, &tmp_opt, 0);
992 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
993 tcp_openreq_init(req, &tmp_opt, skb);
995 treq = inet6_rsk(req);
996 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
997 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
998 TCP_ECN_create_request(req, skb->h.th);
999 treq->pktopts = NULL;
1000 if (ipv6_opt_accepted(sk, skb) ||
1001 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1002 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1003 atomic_inc(&skb->users);
1004 treq->pktopts = skb;
1006 treq->iif = sk->sk_bound_dev_if;
1008 /* So that link locals have meaning */
1009 if (!sk->sk_bound_dev_if &&
1010 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1011 treq->iif = inet6_iif(skb);
1014 isn = tcp_v6_init_sequence(sk,skb);
1016 tcp_rsk(req)->snt_isn = isn;
1018 if (tcp_v6_send_synack(sk, req, NULL))
1021 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1028 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1029 return 0; /* don't send reset */
1032 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1033 struct request_sock *req,
1034 struct dst_entry *dst)
1036 struct inet6_request_sock *treq = inet6_rsk(req);
1037 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1038 struct tcp6_sock *newtcp6sk;
1039 struct inet_sock *newinet;
1040 struct tcp_sock *newtp;
1042 struct ipv6_txoptions *opt;
1044 if (skb->protocol == htons(ETH_P_IP)) {
1049 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1054 newtcp6sk = (struct tcp6_sock *)newsk;
1055 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1057 newinet = inet_sk(newsk);
1058 newnp = inet6_sk(newsk);
1059 newtp = tcp_sk(newsk);
1061 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1063 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1066 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1069 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1071 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1072 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1073 newnp->pktoptions = NULL;
1075 newnp->mcast_oif = inet6_iif(skb);
1076 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1079 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1080 * here, tcp_create_openreq_child now does this for us, see the comment in
1081 * that function for the gory details. -acme
1084 /* It is tricky place. Until this moment IPv4 tcp
1085 worked with IPv6 icsk.icsk_af_ops.
1088 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1095 if (sk_acceptq_is_full(sk))
1098 if (np->rxopt.bits.osrcrt == 2 &&
1099 opt == NULL && treq->pktopts) {
1100 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1102 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1106 struct in6_addr *final_p = NULL, final;
1109 memset(&fl, 0, sizeof(fl));
1110 fl.proto = IPPROTO_TCP;
1111 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1112 if (opt && opt->srcrt) {
1113 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1114 ipv6_addr_copy(&final, &fl.fl6_dst);
1115 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1118 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1119 fl.oif = sk->sk_bound_dev_if;
1120 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1121 fl.fl_ip_sport = inet_sk(sk)->sport;
1123 if (ip6_dst_lookup(sk, &dst, &fl))
1127 ipv6_addr_copy(&fl.fl6_dst, final_p);
1129 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1133 newsk = tcp_create_openreq_child(sk, req, skb);
1138 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1139 * count here, tcp_create_openreq_child now does this for us, see the
1140 * comment in that function for the gory details. -acme
1143 ip6_dst_store(newsk, dst, NULL);
1144 newsk->sk_route_caps = dst->dev->features &
1145 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1147 newtcp6sk = (struct tcp6_sock *)newsk;
1148 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1150 newtp = tcp_sk(newsk);
1151 newinet = inet_sk(newsk);
1152 newnp = inet6_sk(newsk);
1154 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1156 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1157 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1158 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1159 newsk->sk_bound_dev_if = treq->iif;
1161 /* Now IPv6 options...
1163 First: no IPv4 options.
1165 newinet->opt = NULL;
1168 newnp->rxopt.all = np->rxopt.all;
1170 /* Clone pktoptions received with SYN */
1171 newnp->pktoptions = NULL;
1172 if (treq->pktopts != NULL) {
1173 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1174 kfree_skb(treq->pktopts);
1175 treq->pktopts = NULL;
1176 if (newnp->pktoptions)
1177 skb_set_owner_r(newnp->pktoptions, newsk);
1180 newnp->mcast_oif = inet6_iif(skb);
1181 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1183 /* Clone native IPv6 options from listening socket (if any)
1185 Yes, keeping reference count would be much more clever,
1186 but we make one more one thing there: reattach optmem
1190 newnp->opt = ipv6_dup_options(newsk, opt);
1192 sock_kfree_s(sk, opt, opt->tot_len);
1195 newtp->ext_header_len = 0;
1197 newtp->ext_header_len = newnp->opt->opt_nflen +
1198 newnp->opt->opt_flen;
1200 tcp_sync_mss(newsk, dst_mtu(dst));
1201 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1202 tcp_initialize_rcv_mss(newsk);
1204 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1206 __inet6_hash(&tcp_hashinfo, newsk);
1207 inet_inherit_port(&tcp_hashinfo, sk, newsk);
1212 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1214 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1215 if (opt && opt != np->opt)
1216 sock_kfree_s(sk, opt, opt->tot_len);
1221 static int tcp_v6_checksum_init(struct sk_buff *skb)
1223 if (skb->ip_summed == CHECKSUM_HW) {
1224 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1225 &skb->nh.ipv6h->daddr,skb->csum)) {
1226 skb->ip_summed = CHECKSUM_UNNECESSARY;
1231 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1232 &skb->nh.ipv6h->daddr, 0);
1234 if (skb->len <= 76) {
1235 return __skb_checksum_complete(skb);
1240 /* The socket must have it's spinlock held when we get
1243 * We have a potential double-lock case here, so even when
1244 * doing backlog processing we use the BH locking scheme.
1245 * This is because we cannot sleep with the original spinlock
1248 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1250 struct ipv6_pinfo *np = inet6_sk(sk);
1251 struct tcp_sock *tp;
1252 struct sk_buff *opt_skb = NULL;
1254 /* Imagine: socket is IPv6. IPv4 packet arrives,
1255 goes to IPv4 receive handler and backlogged.
1256 From backlog it always goes here. Kerboom...
1257 Fortunately, tcp_rcv_established and rcv_established
1258 handle them correctly, but it is not case with
1259 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1262 if (skb->protocol == htons(ETH_P_IP))
1263 return tcp_v4_do_rcv(sk, skb);
1265 if (sk_filter(sk, skb, 0))
1269 * socket locking is here for SMP purposes as backlog rcv
1270 * is currently called with bh processing disabled.
1273 /* Do Stevens' IPV6_PKTOPTIONS.
1275 Yes, guys, it is the only place in our code, where we
1276 may make it not affecting IPv4.
1277 The rest of code is protocol independent,
1278 and I do not like idea to uglify IPv4.
1280 Actually, all the idea behind IPV6_PKTOPTIONS
1281 looks not very well thought. For now we latch
1282 options, received in the last packet, enqueued
1283 by tcp. Feel free to propose better solution.
1287 opt_skb = skb_clone(skb, GFP_ATOMIC);
1289 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1290 TCP_CHECK_TIMER(sk);
1291 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1293 TCP_CHECK_TIMER(sk);
1295 goto ipv6_pktoptions;
1299 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1302 if (sk->sk_state == TCP_LISTEN) {
1303 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1308 * Queue it on the new socket if the new socket is active,
1309 * otherwise we just shortcircuit this and continue with
1313 if (tcp_child_process(sk, nsk, skb))
1316 __kfree_skb(opt_skb);
1321 TCP_CHECK_TIMER(sk);
1322 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1324 TCP_CHECK_TIMER(sk);
1326 goto ipv6_pktoptions;
1330 tcp_v6_send_reset(skb);
1333 __kfree_skb(opt_skb);
1337 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1342 /* Do you ask, what is it?
1344 1. skb was enqueued by tcp.
1345 2. skb is added to tail of read queue, rather than out of order.
1346 3. socket is not in passive state.
1347 4. Finally, it really contains options, which user wants to receive.
1350 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1351 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1352 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1353 np->mcast_oif = inet6_iif(opt_skb);
1354 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1355 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1356 if (ipv6_opt_accepted(sk, opt_skb)) {
1357 skb_set_owner_r(opt_skb, sk);
1358 opt_skb = xchg(&np->pktoptions, opt_skb);
1360 __kfree_skb(opt_skb);
1361 opt_skb = xchg(&np->pktoptions, NULL);
1370 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1372 struct sk_buff *skb = *pskb;
1377 if (skb->pkt_type != PACKET_HOST)
1381 * Count it even if it's bad.
1383 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1385 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1390 if (th->doff < sizeof(struct tcphdr)/4)
1392 if (!pskb_may_pull(skb, th->doff*4))
1395 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1396 tcp_v6_checksum_init(skb)))
1400 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1401 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1402 skb->len - th->doff*4);
1403 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1404 TCP_SKB_CB(skb)->when = 0;
1405 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1406 TCP_SKB_CB(skb)->sacked = 0;
1408 sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
1409 &skb->nh.ipv6h->daddr, ntohs(th->dest),
1416 if (sk->sk_state == TCP_TIME_WAIT)
1419 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1420 goto discard_and_relse;
1422 if (sk_filter(sk, skb, 0))
1423 goto discard_and_relse;
1429 if (!sock_owned_by_user(sk)) {
1430 if (!tcp_prequeue(sk, skb))
1431 ret = tcp_v6_do_rcv(sk, skb);
1433 sk_add_backlog(sk, skb);
1437 return ret ? -1 : 0;
1440 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1443 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1445 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1447 tcp_v6_send_reset(skb);
1464 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1465 inet_twsk_put((struct inet_timewait_sock *)sk);
1469 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1470 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1471 inet_twsk_put((struct inet_timewait_sock *)sk);
1475 switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1481 sk2 = inet6_lookup_listener(&tcp_hashinfo,
1482 &skb->nh.ipv6h->daddr,
1483 ntohs(th->dest), inet6_iif(skb));
1485 struct inet_timewait_sock *tw = inet_twsk(sk);
1486 inet_twsk_deschedule(tw, &tcp_death_row);
1491 /* Fall through to ACK */
1494 tcp_v6_timewait_ack(sk, skb);
1498 case TCP_TW_SUCCESS:;
1503 static int tcp_v6_rebuild_header(struct sock *sk)
1506 struct dst_entry *dst;
1507 struct ipv6_pinfo *np = inet6_sk(sk);
1509 dst = __sk_dst_check(sk, np->dst_cookie);
1512 struct inet_sock *inet = inet_sk(sk);
1513 struct in6_addr *final_p = NULL, final;
1516 memset(&fl, 0, sizeof(fl));
1517 fl.proto = IPPROTO_TCP;
1518 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1519 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1520 fl.fl6_flowlabel = np->flow_label;
1521 fl.oif = sk->sk_bound_dev_if;
1522 fl.fl_ip_dport = inet->dport;
1523 fl.fl_ip_sport = inet->sport;
1525 if (np->opt && np->opt->srcrt) {
1526 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1527 ipv6_addr_copy(&final, &fl.fl6_dst);
1528 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1532 err = ip6_dst_lookup(sk, &dst, &fl);
1534 sk->sk_route_caps = 0;
1538 ipv6_addr_copy(&fl.fl6_dst, final_p);
1540 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1541 sk->sk_err_soft = -err;
1545 ip6_dst_store(sk, dst, NULL);
1546 sk->sk_route_caps = dst->dev->features &
1547 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1553 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1555 struct sock *sk = skb->sk;
1556 struct inet_sock *inet = inet_sk(sk);
1557 struct ipv6_pinfo *np = inet6_sk(sk);
1559 struct dst_entry *dst;
1560 struct in6_addr *final_p = NULL, final;
1562 memset(&fl, 0, sizeof(fl));
1563 fl.proto = IPPROTO_TCP;
1564 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1565 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1566 fl.fl6_flowlabel = np->flow_label;
1567 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1568 fl.oif = sk->sk_bound_dev_if;
1569 fl.fl_ip_sport = inet->sport;
1570 fl.fl_ip_dport = inet->dport;
1572 if (np->opt && np->opt->srcrt) {
1573 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1574 ipv6_addr_copy(&final, &fl.fl6_dst);
1575 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1579 dst = __sk_dst_check(sk, np->dst_cookie);
1582 int err = ip6_dst_lookup(sk, &dst, &fl);
1585 sk->sk_err_soft = -err;
1590 ipv6_addr_copy(&fl.fl6_dst, final_p);
1592 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1593 sk->sk_route_caps = 0;
1597 ip6_dst_store(sk, dst, NULL);
1598 sk->sk_route_caps = dst->dev->features &
1599 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1602 skb->dst = dst_clone(dst);
1604 /* Restore final destination back after routing done */
1605 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1607 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1610 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1612 struct ipv6_pinfo *np = inet6_sk(sk);
1613 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1615 sin6->sin6_family = AF_INET6;
1616 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1617 sin6->sin6_port = inet_sk(sk)->dport;
1618 /* We do not store received flowlabel for TCP */
1619 sin6->sin6_flowinfo = 0;
1620 sin6->sin6_scope_id = 0;
1621 if (sk->sk_bound_dev_if &&
1622 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1623 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1626 static int tcp_v6_remember_stamp(struct sock *sk)
1628 /* Alas, not yet... */
1632 static struct inet_connection_sock_af_ops ipv6_specific = {
1633 .queue_xmit = tcp_v6_xmit,
1634 .send_check = tcp_v6_send_check,
1635 .rebuild_header = tcp_v6_rebuild_header,
1636 .conn_request = tcp_v6_conn_request,
1637 .syn_recv_sock = tcp_v6_syn_recv_sock,
1638 .remember_stamp = tcp_v6_remember_stamp,
1639 .net_header_len = sizeof(struct ipv6hdr),
1641 .setsockopt = ipv6_setsockopt,
1642 .getsockopt = ipv6_getsockopt,
1643 .addr2sockaddr = v6_addr2sockaddr,
1644 .sockaddr_len = sizeof(struct sockaddr_in6)
1648 * TCP over IPv4 via INET6 API
1651 static struct inet_connection_sock_af_ops ipv6_mapped = {
1652 .queue_xmit = ip_queue_xmit,
1653 .send_check = tcp_v4_send_check,
1654 .rebuild_header = inet_sk_rebuild_header,
1655 .conn_request = tcp_v6_conn_request,
1656 .syn_recv_sock = tcp_v6_syn_recv_sock,
1657 .remember_stamp = tcp_v4_remember_stamp,
1658 .net_header_len = sizeof(struct iphdr),
1660 .setsockopt = ipv6_setsockopt,
1661 .getsockopt = ipv6_getsockopt,
1662 .addr2sockaddr = v6_addr2sockaddr,
1663 .sockaddr_len = sizeof(struct sockaddr_in6)
1668 /* NOTE: A lot of things set to zero explicitly by call to
1669 * sk_alloc() so need not be done here.
1671 static int tcp_v6_init_sock(struct sock *sk)
1673 struct inet_connection_sock *icsk = inet_csk(sk);
1674 struct tcp_sock *tp = tcp_sk(sk);
1676 skb_queue_head_init(&tp->out_of_order_queue);
1677 tcp_init_xmit_timers(sk);
1678 tcp_prequeue_init(tp);
1680 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1681 tp->mdev = TCP_TIMEOUT_INIT;
1683 /* So many TCP implementations out there (incorrectly) count the
1684 * initial SYN frame in their delayed-ACK and congestion control
1685 * algorithms that we must have the following bandaid to talk
1686 * efficiently to them. -DaveM
1690 /* See draft-stevens-tcpca-spec-01 for discussion of the
1691 * initialization of these values.
1693 tp->snd_ssthresh = 0x7fffffff;
1694 tp->snd_cwnd_clamp = ~0;
1695 tp->mss_cache = 536;
1697 tp->reordering = sysctl_tcp_reordering;
1699 sk->sk_state = TCP_CLOSE;
1701 icsk->icsk_af_ops = &ipv6_specific;
1702 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1703 sk->sk_write_space = sk_stream_write_space;
1704 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1706 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1707 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1709 atomic_inc(&tcp_sockets_allocated);
1714 static int tcp_v6_destroy_sock(struct sock *sk)
1716 tcp_v4_destroy_sock(sk);
1717 return inet6_destroy_sock(sk);
1720 /* Proc filesystem TCPv6 sock list dumping. */
1721 static void get_openreq6(struct seq_file *seq,
1722 struct sock *sk, struct request_sock *req, int i, int uid)
1724 int ttd = req->expires - jiffies;
1725 struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1726 struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1732 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1733 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1735 src->s6_addr32[0], src->s6_addr32[1],
1736 src->s6_addr32[2], src->s6_addr32[3],
1737 ntohs(inet_sk(sk)->sport),
1738 dest->s6_addr32[0], dest->s6_addr32[1],
1739 dest->s6_addr32[2], dest->s6_addr32[3],
1740 ntohs(inet_rsk(req)->rmt_port),
1742 0,0, /* could print option size, but that is af dependent. */
1743 1, /* timers active (only the expire timer) */
1744 jiffies_to_clock_t(ttd),
1747 0, /* non standard timer */
1748 0, /* open_requests have no inode */
1752 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1754 struct in6_addr *dest, *src;
1757 unsigned long timer_expires;
1758 struct inet_sock *inet = inet_sk(sp);
1759 struct tcp_sock *tp = tcp_sk(sp);
1760 const struct inet_connection_sock *icsk = inet_csk(sp);
1761 struct ipv6_pinfo *np = inet6_sk(sp);
1764 src = &np->rcv_saddr;
1765 destp = ntohs(inet->dport);
1766 srcp = ntohs(inet->sport);
1768 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1770 timer_expires = icsk->icsk_timeout;
1771 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1773 timer_expires = icsk->icsk_timeout;
1774 } else if (timer_pending(&sp->sk_timer)) {
1776 timer_expires = sp->sk_timer.expires;
1779 timer_expires = jiffies;
1783 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1784 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
1786 src->s6_addr32[0], src->s6_addr32[1],
1787 src->s6_addr32[2], src->s6_addr32[3], srcp,
1788 dest->s6_addr32[0], dest->s6_addr32[1],
1789 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1791 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
1793 jiffies_to_clock_t(timer_expires - jiffies),
1794 icsk->icsk_retransmits,
1796 icsk->icsk_probes_out,
1798 atomic_read(&sp->sk_refcnt), sp,
1801 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1802 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
1806 static void get_timewait6_sock(struct seq_file *seq,
1807 struct inet_timewait_sock *tw, int i)
1809 struct in6_addr *dest, *src;
1811 struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
1812 int ttd = tw->tw_ttd - jiffies;
1817 dest = &tcp6tw->tw_v6_daddr;
1818 src = &tcp6tw->tw_v6_rcv_saddr;
1819 destp = ntohs(tw->tw_dport);
1820 srcp = ntohs(tw->tw_sport);
1823 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1824 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1826 src->s6_addr32[0], src->s6_addr32[1],
1827 src->s6_addr32[2], src->s6_addr32[3], srcp,
1828 dest->s6_addr32[0], dest->s6_addr32[1],
1829 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1830 tw->tw_substate, 0, 0,
1831 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
1832 atomic_read(&tw->tw_refcnt), tw);
1835 #ifdef CONFIG_PROC_FS
1836 static int tcp6_seq_show(struct seq_file *seq, void *v)
1838 struct tcp_iter_state *st;
1840 if (v == SEQ_START_TOKEN) {
1845 "st tx_queue rx_queue tr tm->when retrnsmt"
1846 " uid timeout inode\n");
1851 switch (st->state) {
1852 case TCP_SEQ_STATE_LISTENING:
1853 case TCP_SEQ_STATE_ESTABLISHED:
1854 get_tcp6_sock(seq, v, st->num);
1856 case TCP_SEQ_STATE_OPENREQ:
1857 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
1859 case TCP_SEQ_STATE_TIME_WAIT:
1860 get_timewait6_sock(seq, v, st->num);
1867 static struct file_operations tcp6_seq_fops;
1868 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1869 .owner = THIS_MODULE,
1872 .seq_show = tcp6_seq_show,
1873 .seq_fops = &tcp6_seq_fops,
1876 int __init tcp6_proc_init(void)
1878 return tcp_proc_register(&tcp6_seq_afinfo);
1881 void tcp6_proc_exit(void)
1883 tcp_proc_unregister(&tcp6_seq_afinfo);
1887 struct proto tcpv6_prot = {
1889 .owner = THIS_MODULE,
1891 .connect = tcp_v6_connect,
1892 .disconnect = tcp_disconnect,
1893 .accept = inet_csk_accept,
1895 .init = tcp_v6_init_sock,
1896 .destroy = tcp_v6_destroy_sock,
1897 .shutdown = tcp_shutdown,
1898 .setsockopt = tcp_setsockopt,
1899 .getsockopt = tcp_getsockopt,
1900 .sendmsg = tcp_sendmsg,
1901 .recvmsg = tcp_recvmsg,
1902 .backlog_rcv = tcp_v6_do_rcv,
1903 .hash = tcp_v6_hash,
1904 .unhash = tcp_unhash,
1905 .get_port = tcp_v6_get_port,
1906 .enter_memory_pressure = tcp_enter_memory_pressure,
1907 .sockets_allocated = &tcp_sockets_allocated,
1908 .memory_allocated = &tcp_memory_allocated,
1909 .memory_pressure = &tcp_memory_pressure,
1910 .orphan_count = &tcp_orphan_count,
1911 .sysctl_mem = sysctl_tcp_mem,
1912 .sysctl_wmem = sysctl_tcp_wmem,
1913 .sysctl_rmem = sysctl_tcp_rmem,
1914 .max_header = MAX_TCP_HEADER,
1915 .obj_size = sizeof(struct tcp6_sock),
1916 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1917 .rsk_prot = &tcp6_request_sock_ops,
1920 static struct inet6_protocol tcpv6_protocol = {
1921 .handler = tcp_v6_rcv,
1922 .err_handler = tcp_v6_err,
1923 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1926 static struct inet_protosw tcpv6_protosw = {
1927 .type = SOCK_STREAM,
1928 .protocol = IPPROTO_TCP,
1929 .prot = &tcpv6_prot,
1930 .ops = &inet6_stream_ops,
1933 .flags = INET_PROTOSW_PERMANENT,
1936 void __init tcpv6_init(void)
1938 /* register inet6 protocol */
1939 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
1940 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
1941 inet6_register_protosw(&tcpv6_protosw);