[IPV6]: Introduce inet6_timewait_sock
[safe/jmp/linux-2.6] / net / ipv6 / tcp_ipv6.c
1 /*
2  *      TCP over IPv6
3  *      Linux INET6 implementation 
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
9  *
10  *      Based on: 
11  *      linux/net/ipv4/tcp.c
12  *      linux/net/ipv4/tcp_input.c
13  *      linux/net/ipv4/tcp_output.c
14  *
15  *      Fixes:
16  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
17  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
18  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
19  *                                      a single port at the same time.
20  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
21  *
22  *      This program is free software; you can redistribute it and/or
23  *      modify it under the terms of the GNU General Public License
24  *      as published by the Free Software Foundation; either version
25  *      2 of the License, or (at your option) any later version.
26  */
27
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
36 #include <linux/in.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
43
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
47
48 #include <net/tcp.h>
49 #include <net/ndisc.h>
50 #include <net/inet6_hashtables.h>
51 #include <net/inet6_connection_sock.h>
52 #include <net/ipv6.h>
53 #include <net/transp_v6.h>
54 #include <net/addrconf.h>
55 #include <net/ip6_route.h>
56 #include <net/ip6_checksum.h>
57 #include <net/inet_ecn.h>
58 #include <net/protocol.h>
59 #include <net/xfrm.h>
60 #include <net/addrconf.h>
61 #include <net/snmp.h>
62 #include <net/dsfield.h>
63
64 #include <asm/uaccess.h>
65
66 #include <linux/proc_fs.h>
67 #include <linux/seq_file.h>
68
69 static void     tcp_v6_send_reset(struct sk_buff *skb);
70 static void     tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
71 static void     tcp_v6_send_check(struct sock *sk, int len, 
72                                   struct sk_buff *skb);
73
74 static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
75
76 static struct inet_connection_sock_af_ops ipv6_mapped;
77 static struct inet_connection_sock_af_ops ipv6_specific;
78
79 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
80 {
81         return inet_csk_get_port(&tcp_hashinfo, sk, snum,
82                                  inet6_csk_bind_conflict);
83 }
84
85 static void tcp_v6_hash(struct sock *sk)
86 {
87         if (sk->sk_state != TCP_CLOSE) {
88                 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
89                         tcp_prot.hash(sk);
90                         return;
91                 }
92                 local_bh_disable();
93                 __inet6_hash(&tcp_hashinfo, sk);
94                 local_bh_enable();
95         }
96 }
97
98 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
99                                    struct in6_addr *saddr, 
100                                    struct in6_addr *daddr, 
101                                    unsigned long base)
102 {
103         return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
104 }
105
106 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
107 {
108         if (skb->protocol == htons(ETH_P_IPV6)) {
109                 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
110                                                     skb->nh.ipv6h->saddr.s6_addr32,
111                                                     skb->h.th->dest,
112                                                     skb->h.th->source);
113         } else {
114                 return secure_tcp_sequence_number(skb->nh.iph->daddr,
115                                                   skb->nh.iph->saddr,
116                                                   skb->h.th->dest,
117                                                   skb->h.th->source);
118         }
119 }
120
121 static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
122                                       struct inet_timewait_sock **twp)
123 {
124         struct inet_sock *inet = inet_sk(sk);
125         const struct ipv6_pinfo *np = inet6_sk(sk);
126         const struct in6_addr *daddr = &np->rcv_saddr;
127         const struct in6_addr *saddr = &np->daddr;
128         const int dif = sk->sk_bound_dev_if;
129         const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
130         unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport);
131         struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
132         struct sock *sk2;
133         const struct hlist_node *node;
134         struct inet_timewait_sock *tw;
135
136         prefetch(head->chain.first);
137         write_lock(&head->lock);
138
139         /* Check TIME-WAIT sockets first. */
140         sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
141                 const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2);
142
143                 tw = inet_twsk(sk2);
144
145                 if(*((__u32 *)&(tw->tw_dport))  == ports        &&
146                    sk2->sk_family               == PF_INET6     &&
147                    ipv6_addr_equal(&tw6->tw_v6_daddr, saddr)    &&
148                    ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr)        &&
149                    sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
150                         const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
151                         struct tcp_sock *tp = tcp_sk(sk);
152
153                         if (tcptw->tw_ts_recent_stamp &&
154                             (!twp ||
155                              (sysctl_tcp_tw_reuse &&
156                               xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
157                                 /* See comment in tcp_ipv4.c */
158                                 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
159                                 if (!tp->write_seq)
160                                         tp->write_seq = 1;
161                                 tp->rx_opt.ts_recent       = tcptw->tw_ts_recent;
162                                 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
163                                 sock_hold(sk2);
164                                 goto unique;
165                         } else
166                                 goto not_unique;
167                 }
168         }
169         tw = NULL;
170
171         /* And established part... */
172         sk_for_each(sk2, node, &head->chain) {
173                 if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
174                         goto not_unique;
175         }
176
177 unique:
178         BUG_TRAP(sk_unhashed(sk));
179         __sk_add_node(sk, &head->chain);
180         sk->sk_hash = hash;
181         sock_prot_inc_use(sk->sk_prot);
182         write_unlock(&head->lock);
183
184         if (twp) {
185                 *twp = tw;
186                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
187         } else if (tw) {
188                 /* Silly. Should hash-dance instead... */
189                 inet_twsk_deschedule(tw, &tcp_death_row);
190                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
191
192                 inet_twsk_put(tw);
193         }
194         return 0;
195
196 not_unique:
197         write_unlock(&head->lock);
198         return -EADDRNOTAVAIL;
199 }
200
201 static inline u32 tcpv6_port_offset(const struct sock *sk)
202 {
203         const struct inet_sock *inet = inet_sk(sk);
204         const struct ipv6_pinfo *np = inet6_sk(sk);
205
206         return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
207                                            np->daddr.s6_addr32,
208                                            inet->dport);
209 }
210
211 static int tcp_v6_hash_connect(struct sock *sk)
212 {
213         unsigned short snum = inet_sk(sk)->num;
214         struct inet_bind_hashbucket *head;
215         struct inet_bind_bucket *tb;
216         int ret;
217
218         if (!snum) {
219                 int low = sysctl_local_port_range[0];
220                 int high = sysctl_local_port_range[1];
221                 int range = high - low;
222                 int i;
223                 int port;
224                 static u32 hint;
225                 u32 offset = hint + tcpv6_port_offset(sk);
226                 struct hlist_node *node;
227                 struct inet_timewait_sock *tw = NULL;
228
229                 local_bh_disable();
230                 for (i = 1; i <= range; i++) {
231                         port = low + (i + offset) % range;
232                         head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
233                         spin_lock(&head->lock);
234
235                         /* Does not bother with rcv_saddr checks,
236                          * because the established check is already
237                          * unique enough.
238                          */
239                         inet_bind_bucket_for_each(tb, node, &head->chain) {
240                                 if (tb->port == port) {
241                                         BUG_TRAP(!hlist_empty(&tb->owners));
242                                         if (tb->fastreuse >= 0)
243                                                 goto next_port;
244                                         if (!__tcp_v6_check_established(sk,
245                                                                         port,
246                                                                         &tw))
247                                                 goto ok;
248                                         goto next_port;
249                                 }
250                         }
251
252                         tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
253                         if (!tb) {
254                                 spin_unlock(&head->lock);
255                                 break;
256                         }
257                         tb->fastreuse = -1;
258                         goto ok;
259
260                 next_port:
261                         spin_unlock(&head->lock);
262                 }
263                 local_bh_enable();
264
265                 return -EADDRNOTAVAIL;
266
267 ok:
268                 hint += i;
269
270                 /* Head lock still held and bh's disabled */
271                 inet_bind_hash(sk, tb, port);
272                 if (sk_unhashed(sk)) {
273                         inet_sk(sk)->sport = htons(port);
274                         __inet6_hash(&tcp_hashinfo, sk);
275                 }
276                 spin_unlock(&head->lock);
277
278                 if (tw) {
279                         inet_twsk_deschedule(tw, &tcp_death_row);
280                         inet_twsk_put(tw);
281                 }
282
283                 ret = 0;
284                 goto out;
285         }
286
287         head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
288         tb   = inet_csk(sk)->icsk_bind_hash;
289         spin_lock_bh(&head->lock);
290
291         if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
292                 __inet6_hash(&tcp_hashinfo, sk);
293                 spin_unlock_bh(&head->lock);
294                 return 0;
295         } else {
296                 spin_unlock(&head->lock);
297                 /* No definite answer... Walk to established hash table */
298                 ret = __tcp_v6_check_established(sk, snum, NULL);
299 out:
300                 local_bh_enable();
301                 return ret;
302         }
303 }
304
305 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 
306                           int addr_len)
307 {
308         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
309         struct inet_sock *inet = inet_sk(sk);
310         struct ipv6_pinfo *np = inet6_sk(sk);
311         struct tcp_sock *tp = tcp_sk(sk);
312         struct in6_addr *saddr = NULL, *final_p = NULL, final;
313         struct flowi fl;
314         struct dst_entry *dst;
315         int addr_type;
316         int err;
317
318         if (addr_len < SIN6_LEN_RFC2133) 
319                 return -EINVAL;
320
321         if (usin->sin6_family != AF_INET6) 
322                 return(-EAFNOSUPPORT);
323
324         memset(&fl, 0, sizeof(fl));
325
326         if (np->sndflow) {
327                 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
328                 IP6_ECN_flow_init(fl.fl6_flowlabel);
329                 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
330                         struct ip6_flowlabel *flowlabel;
331                         flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
332                         if (flowlabel == NULL)
333                                 return -EINVAL;
334                         ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
335                         fl6_sock_release(flowlabel);
336                 }
337         }
338
339         /*
340          *      connect() to INADDR_ANY means loopback (BSD'ism).
341          */
342         
343         if(ipv6_addr_any(&usin->sin6_addr))
344                 usin->sin6_addr.s6_addr[15] = 0x1; 
345
346         addr_type = ipv6_addr_type(&usin->sin6_addr);
347
348         if(addr_type & IPV6_ADDR_MULTICAST)
349                 return -ENETUNREACH;
350
351         if (addr_type&IPV6_ADDR_LINKLOCAL) {
352                 if (addr_len >= sizeof(struct sockaddr_in6) &&
353                     usin->sin6_scope_id) {
354                         /* If interface is set while binding, indices
355                          * must coincide.
356                          */
357                         if (sk->sk_bound_dev_if &&
358                             sk->sk_bound_dev_if != usin->sin6_scope_id)
359                                 return -EINVAL;
360
361                         sk->sk_bound_dev_if = usin->sin6_scope_id;
362                 }
363
364                 /* Connect to link-local address requires an interface */
365                 if (!sk->sk_bound_dev_if)
366                         return -EINVAL;
367         }
368
369         if (tp->rx_opt.ts_recent_stamp &&
370             !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
371                 tp->rx_opt.ts_recent = 0;
372                 tp->rx_opt.ts_recent_stamp = 0;
373                 tp->write_seq = 0;
374         }
375
376         ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
377         np->flow_label = fl.fl6_flowlabel;
378
379         /*
380          *      TCP over IPv4
381          */
382
383         if (addr_type == IPV6_ADDR_MAPPED) {
384                 u32 exthdrlen = tp->ext_header_len;
385                 struct sockaddr_in sin;
386
387                 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
388
389                 if (__ipv6_only_sock(sk))
390                         return -ENETUNREACH;
391
392                 sin.sin_family = AF_INET;
393                 sin.sin_port = usin->sin6_port;
394                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
395
396                 inet_csk(sk)->icsk_af_ops = &ipv6_mapped;
397                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
398
399                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
400
401                 if (err) {
402                         tp->ext_header_len = exthdrlen;
403                         inet_csk(sk)->icsk_af_ops = &ipv6_specific;
404                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
405                         goto failure;
406                 } else {
407                         ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
408                                       inet->saddr);
409                         ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
410                                       inet->rcv_saddr);
411                 }
412
413                 return err;
414         }
415
416         if (!ipv6_addr_any(&np->rcv_saddr))
417                 saddr = &np->rcv_saddr;
418
419         fl.proto = IPPROTO_TCP;
420         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
421         ipv6_addr_copy(&fl.fl6_src,
422                        (saddr ? saddr : &np->saddr));
423         fl.oif = sk->sk_bound_dev_if;
424         fl.fl_ip_dport = usin->sin6_port;
425         fl.fl_ip_sport = inet->sport;
426
427         if (np->opt && np->opt->srcrt) {
428                 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
429                 ipv6_addr_copy(&final, &fl.fl6_dst);
430                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
431                 final_p = &final;
432         }
433
434         err = ip6_dst_lookup(sk, &dst, &fl);
435         if (err)
436                 goto failure;
437         if (final_p)
438                 ipv6_addr_copy(&fl.fl6_dst, final_p);
439
440         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
441                 goto failure;
442
443         if (saddr == NULL) {
444                 saddr = &fl.fl6_src;
445                 ipv6_addr_copy(&np->rcv_saddr, saddr);
446         }
447
448         /* set the source address */
449         ipv6_addr_copy(&np->saddr, saddr);
450         inet->rcv_saddr = LOOPBACK4_IPV6;
451
452         ip6_dst_store(sk, dst, NULL);
453         sk->sk_route_caps = dst->dev->features &
454                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
455
456         tp->ext_header_len = 0;
457         if (np->opt)
458                 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
459
460         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
461
462         inet->dport = usin->sin6_port;
463
464         tcp_set_state(sk, TCP_SYN_SENT);
465         err = tcp_v6_hash_connect(sk);
466         if (err)
467                 goto late_failure;
468
469         if (!tp->write_seq)
470                 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
471                                                              np->daddr.s6_addr32,
472                                                              inet->sport,
473                                                              inet->dport);
474
475         err = tcp_connect(sk);
476         if (err)
477                 goto late_failure;
478
479         return 0;
480
481 late_failure:
482         tcp_set_state(sk, TCP_CLOSE);
483         __sk_dst_reset(sk);
484 failure:
485         inet->dport = 0;
486         sk->sk_route_caps = 0;
487         return err;
488 }
489
490 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
491                 int type, int code, int offset, __u32 info)
492 {
493         struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
494         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
495         struct ipv6_pinfo *np;
496         struct sock *sk;
497         int err;
498         struct tcp_sock *tp; 
499         __u32 seq;
500
501         sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
502                           th->source, skb->dev->ifindex);
503
504         if (sk == NULL) {
505                 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
506                 return;
507         }
508
509         if (sk->sk_state == TCP_TIME_WAIT) {
510                 inet_twsk_put((struct inet_timewait_sock *)sk);
511                 return;
512         }
513
514         bh_lock_sock(sk);
515         if (sock_owned_by_user(sk))
516                 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
517
518         if (sk->sk_state == TCP_CLOSE)
519                 goto out;
520
521         tp = tcp_sk(sk);
522         seq = ntohl(th->seq); 
523         if (sk->sk_state != TCP_LISTEN &&
524             !between(seq, tp->snd_una, tp->snd_nxt)) {
525                 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
526                 goto out;
527         }
528
529         np = inet6_sk(sk);
530
531         if (type == ICMPV6_PKT_TOOBIG) {
532                 struct dst_entry *dst = NULL;
533
534                 if (sock_owned_by_user(sk))
535                         goto out;
536                 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
537                         goto out;
538
539                 /* icmp should have updated the destination cache entry */
540                 dst = __sk_dst_check(sk, np->dst_cookie);
541
542                 if (dst == NULL) {
543                         struct inet_sock *inet = inet_sk(sk);
544                         struct flowi fl;
545
546                         /* BUGGG_FUTURE: Again, it is not clear how
547                            to handle rthdr case. Ignore this complexity
548                            for now.
549                          */
550                         memset(&fl, 0, sizeof(fl));
551                         fl.proto = IPPROTO_TCP;
552                         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
553                         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
554                         fl.oif = sk->sk_bound_dev_if;
555                         fl.fl_ip_dport = inet->dport;
556                         fl.fl_ip_sport = inet->sport;
557
558                         if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
559                                 sk->sk_err_soft = -err;
560                                 goto out;
561                         }
562
563                         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
564                                 sk->sk_err_soft = -err;
565                                 goto out;
566                         }
567
568                 } else
569                         dst_hold(dst);
570
571                 if (tp->pmtu_cookie > dst_mtu(dst)) {
572                         tcp_sync_mss(sk, dst_mtu(dst));
573                         tcp_simple_retransmit(sk);
574                 } /* else let the usual retransmit timer handle it */
575                 dst_release(dst);
576                 goto out;
577         }
578
579         icmpv6_err_convert(type, code, &err);
580
581         /* Might be for an request_sock */
582         switch (sk->sk_state) {
583                 struct request_sock *req, **prev;
584         case TCP_LISTEN:
585                 if (sock_owned_by_user(sk))
586                         goto out;
587
588                 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
589                                            &hdr->saddr, inet6_iif(skb));
590                 if (!req)
591                         goto out;
592
593                 /* ICMPs are not backlogged, hence we cannot get
594                  * an established socket here.
595                  */
596                 BUG_TRAP(req->sk == NULL);
597
598                 if (seq != tcp_rsk(req)->snt_isn) {
599                         NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
600                         goto out;
601                 }
602
603                 inet_csk_reqsk_queue_drop(sk, req, prev);
604                 goto out;
605
606         case TCP_SYN_SENT:
607         case TCP_SYN_RECV:  /* Cannot happen.
608                                It can, it SYNs are crossed. --ANK */ 
609                 if (!sock_owned_by_user(sk)) {
610                         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
611                         sk->sk_err = err;
612                         sk->sk_error_report(sk);                /* Wake people up to see the error (see connect in sock.c) */
613
614                         tcp_done(sk);
615                 } else
616                         sk->sk_err_soft = err;
617                 goto out;
618         }
619
620         if (!sock_owned_by_user(sk) && np->recverr) {
621                 sk->sk_err = err;
622                 sk->sk_error_report(sk);
623         } else
624                 sk->sk_err_soft = err;
625
626 out:
627         bh_unlock_sock(sk);
628         sock_put(sk);
629 }
630
631
632 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
633                               struct dst_entry *dst)
634 {
635         struct inet6_request_sock *treq = inet6_rsk(req);
636         struct ipv6_pinfo *np = inet6_sk(sk);
637         struct sk_buff * skb;
638         struct ipv6_txoptions *opt = NULL;
639         struct in6_addr * final_p = NULL, final;
640         struct flowi fl;
641         int err = -1;
642
643         memset(&fl, 0, sizeof(fl));
644         fl.proto = IPPROTO_TCP;
645         ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
646         ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
647         fl.fl6_flowlabel = 0;
648         fl.oif = treq->iif;
649         fl.fl_ip_dport = inet_rsk(req)->rmt_port;
650         fl.fl_ip_sport = inet_sk(sk)->sport;
651
652         if (dst == NULL) {
653                 opt = np->opt;
654                 if (opt == NULL &&
655                     np->rxopt.bits.osrcrt == 2 &&
656                     treq->pktopts) {
657                         struct sk_buff *pktopts = treq->pktopts;
658                         struct inet6_skb_parm *rxopt = IP6CB(pktopts);
659                         if (rxopt->srcrt)
660                                 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
661                 }
662
663                 if (opt && opt->srcrt) {
664                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
665                         ipv6_addr_copy(&final, &fl.fl6_dst);
666                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
667                         final_p = &final;
668                 }
669
670                 err = ip6_dst_lookup(sk, &dst, &fl);
671                 if (err)
672                         goto done;
673                 if (final_p)
674                         ipv6_addr_copy(&fl.fl6_dst, final_p);
675                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
676                         goto done;
677         }
678
679         skb = tcp_make_synack(sk, dst, req);
680         if (skb) {
681                 struct tcphdr *th = skb->h.th;
682
683                 th->check = tcp_v6_check(th, skb->len,
684                                          &treq->loc_addr, &treq->rmt_addr,
685                                          csum_partial((char *)th, skb->len, skb->csum));
686
687                 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
688                 err = ip6_xmit(sk, skb, &fl, opt, 0);
689                 if (err == NET_XMIT_CN)
690                         err = 0;
691         }
692
693 done:
694         if (opt && opt != np->opt)
695                 sock_kfree_s(sk, opt, opt->tot_len);
696         return err;
697 }
698
699 static void tcp_v6_reqsk_destructor(struct request_sock *req)
700 {
701         if (inet6_rsk(req)->pktopts)
702                 kfree_skb(inet6_rsk(req)->pktopts);
703 }
704
705 static struct request_sock_ops tcp6_request_sock_ops = {
706         .family         =       AF_INET6,
707         .obj_size       =       sizeof(struct tcp6_request_sock),
708         .rtx_syn_ack    =       tcp_v6_send_synack,
709         .send_ack       =       tcp_v6_reqsk_send_ack,
710         .destructor     =       tcp_v6_reqsk_destructor,
711         .send_reset     =       tcp_v6_send_reset
712 };
713
714 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
715 {
716         struct ipv6_pinfo *np = inet6_sk(sk);
717         struct inet6_skb_parm *opt = IP6CB(skb);
718
719         if (np->rxopt.all) {
720                 if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
721                     ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && np->rxopt.bits.rxflow) ||
722                     (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) ||
723                     ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
724                         return 1;
725         }
726         return 0;
727 }
728
729
730 static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
731 {
732         struct ipv6_pinfo *np = inet6_sk(sk);
733         struct tcphdr *th = skb->h.th;
734
735         if (skb->ip_summed == CHECKSUM_HW) {
736                 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
737                 skb->csum = offsetof(struct tcphdr, check);
738         } else {
739                 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 
740                                             csum_partial((char *)th, th->doff<<2, 
741                                                          skb->csum));
742         }
743 }
744
745
746 static void tcp_v6_send_reset(struct sk_buff *skb)
747 {
748         struct tcphdr *th = skb->h.th, *t1; 
749         struct sk_buff *buff;
750         struct flowi fl;
751
752         if (th->rst)
753                 return;
754
755         if (!ipv6_unicast_destination(skb))
756                 return; 
757
758         /*
759          * We need to grab some memory, and put together an RST,
760          * and then put it into the queue to be sent.
761          */
762
763         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
764                          GFP_ATOMIC);
765         if (buff == NULL) 
766                 return;
767
768         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
769
770         t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
771
772         /* Swap the send and the receive. */
773         memset(t1, 0, sizeof(*t1));
774         t1->dest = th->source;
775         t1->source = th->dest;
776         t1->doff = sizeof(*t1)/4;
777         t1->rst = 1;
778   
779         if(th->ack) {
780                 t1->seq = th->ack_seq;
781         } else {
782                 t1->ack = 1;
783                 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
784                                     + skb->len - (th->doff<<2));
785         }
786
787         buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
788
789         memset(&fl, 0, sizeof(fl));
790         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
791         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
792
793         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
794                                     sizeof(*t1), IPPROTO_TCP,
795                                     buff->csum);
796
797         fl.proto = IPPROTO_TCP;
798         fl.oif = inet6_iif(skb);
799         fl.fl_ip_dport = t1->dest;
800         fl.fl_ip_sport = t1->source;
801
802         /* sk = NULL, but it is safe for now. RST socket required. */
803         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
804
805                 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
806                         ip6_xmit(NULL, buff, &fl, NULL, 0);
807                         TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
808                         TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
809                         return;
810                 }
811         }
812
813         kfree_skb(buff);
814 }
815
816 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
817 {
818         struct tcphdr *th = skb->h.th, *t1;
819         struct sk_buff *buff;
820         struct flowi fl;
821         int tot_len = sizeof(struct tcphdr);
822
823         if (ts)
824                 tot_len += 3*4;
825
826         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
827                          GFP_ATOMIC);
828         if (buff == NULL)
829                 return;
830
831         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
832
833         t1 = (struct tcphdr *) skb_push(buff,tot_len);
834
835         /* Swap the send and the receive. */
836         memset(t1, 0, sizeof(*t1));
837         t1->dest = th->source;
838         t1->source = th->dest;
839         t1->doff = tot_len/4;
840         t1->seq = htonl(seq);
841         t1->ack_seq = htonl(ack);
842         t1->ack = 1;
843         t1->window = htons(win);
844         
845         if (ts) {
846                 u32 *ptr = (u32*)(t1 + 1);
847                 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
848                                (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
849                 *ptr++ = htonl(tcp_time_stamp);
850                 *ptr = htonl(ts);
851         }
852
853         buff->csum = csum_partial((char *)t1, tot_len, 0);
854
855         memset(&fl, 0, sizeof(fl));
856         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
857         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
858
859         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
860                                     tot_len, IPPROTO_TCP,
861                                     buff->csum);
862
863         fl.proto = IPPROTO_TCP;
864         fl.oif = inet6_iif(skb);
865         fl.fl_ip_dport = t1->dest;
866         fl.fl_ip_sport = t1->source;
867
868         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
869                 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
870                         ip6_xmit(NULL, buff, &fl, NULL, 0);
871                         TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
872                         return;
873                 }
874         }
875
876         kfree_skb(buff);
877 }
878
879 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
880 {
881         struct inet_timewait_sock *tw = inet_twsk(sk);
882         const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
883
884         tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
885                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
886                         tcptw->tw_ts_recent);
887
888         inet_twsk_put(tw);
889 }
890
891 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
892 {
893         tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
894 }
895
896
897 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
898 {
899         struct request_sock *req, **prev;
900         const struct tcphdr *th = skb->h.th;
901         struct sock *nsk;
902
903         /* Find possible connection requests. */
904         req = inet6_csk_search_req(sk, &prev, th->source,
905                                    &skb->nh.ipv6h->saddr,
906                                    &skb->nh.ipv6h->daddr, inet6_iif(skb));
907         if (req)
908                 return tcp_check_req(sk, skb, req, prev);
909
910         nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
911                                          th->source, &skb->nh.ipv6h->daddr,
912                                          ntohs(th->dest), inet6_iif(skb));
913
914         if (nsk) {
915                 if (nsk->sk_state != TCP_TIME_WAIT) {
916                         bh_lock_sock(nsk);
917                         return nsk;
918                 }
919                 inet_twsk_put((struct inet_timewait_sock *)nsk);
920                 return NULL;
921         }
922
923 #if 0 /*def CONFIG_SYN_COOKIES*/
924         if (!th->rst && !th->syn && th->ack)
925                 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
926 #endif
927         return sk;
928 }
929
930 /* FIXME: this is substantially similar to the ipv4 code.
931  * Can some kind of merge be done? -- erics
932  */
933 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
934 {
935         struct inet6_request_sock *treq;
936         struct ipv6_pinfo *np = inet6_sk(sk);
937         struct tcp_options_received tmp_opt;
938         struct tcp_sock *tp = tcp_sk(sk);
939         struct request_sock *req = NULL;
940         __u32 isn = TCP_SKB_CB(skb)->when;
941
942         if (skb->protocol == htons(ETH_P_IP))
943                 return tcp_v4_conn_request(sk, skb);
944
945         if (!ipv6_unicast_destination(skb))
946                 goto drop; 
947
948         /*
949          *      There are no SYN attacks on IPv6, yet...        
950          */
951         if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
952                 if (net_ratelimit())
953                         printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
954                 goto drop;              
955         }
956
957         if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
958                 goto drop;
959
960         req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
961         if (req == NULL)
962                 goto drop;
963
964         tcp_clear_options(&tmp_opt);
965         tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
966         tmp_opt.user_mss = tp->rx_opt.user_mss;
967
968         tcp_parse_options(skb, &tmp_opt, 0);
969
970         tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
971         tcp_openreq_init(req, &tmp_opt, skb);
972
973         treq = inet6_rsk(req);
974         ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
975         ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
976         TCP_ECN_create_request(req, skb->h.th);
977         treq->pktopts = NULL;
978         if (ipv6_opt_accepted(sk, skb) ||
979             np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
980             np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
981                 atomic_inc(&skb->users);
982                 treq->pktopts = skb;
983         }
984         treq->iif = sk->sk_bound_dev_if;
985
986         /* So that link locals have meaning */
987         if (!sk->sk_bound_dev_if &&
988             ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
989                 treq->iif = inet6_iif(skb);
990
991         if (isn == 0) 
992                 isn = tcp_v6_init_sequence(sk,skb);
993
994         tcp_rsk(req)->snt_isn = isn;
995
996         if (tcp_v6_send_synack(sk, req, NULL))
997                 goto drop;
998
999         inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1000         return 0;
1001
1002 drop:
1003         if (req)
1004                 reqsk_free(req);
1005
1006         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1007         return 0; /* don't send reset */
1008 }
1009
1010 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1011                                           struct request_sock *req,
1012                                           struct dst_entry *dst)
1013 {
1014         struct inet6_request_sock *treq = inet6_rsk(req);
1015         struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1016         struct tcp6_sock *newtcp6sk;
1017         struct inet_sock *newinet;
1018         struct tcp_sock *newtp;
1019         struct sock *newsk;
1020         struct ipv6_txoptions *opt;
1021
1022         if (skb->protocol == htons(ETH_P_IP)) {
1023                 /*
1024                  *      v6 mapped
1025                  */
1026
1027                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1028
1029                 if (newsk == NULL) 
1030                         return NULL;
1031
1032                 newtcp6sk = (struct tcp6_sock *)newsk;
1033                 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1034
1035                 newinet = inet_sk(newsk);
1036                 newnp = inet6_sk(newsk);
1037                 newtp = tcp_sk(newsk);
1038
1039                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1040
1041                 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1042                               newinet->daddr);
1043
1044                 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1045                               newinet->saddr);
1046
1047                 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1048
1049                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1050                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1051                 newnp->pktoptions  = NULL;
1052                 newnp->opt         = NULL;
1053                 newnp->mcast_oif   = inet6_iif(skb);
1054                 newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
1055
1056                 /*
1057                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1058                  * here, tcp_create_openreq_child now does this for us, see the comment in
1059                  * that function for the gory details. -acme
1060                  */
1061
1062                 /* It is tricky place. Until this moment IPv4 tcp
1063                    worked with IPv6 icsk.icsk_af_ops.
1064                    Sync it now.
1065                  */
1066                 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1067
1068                 return newsk;
1069         }
1070
1071         opt = np->opt;
1072
1073         if (sk_acceptq_is_full(sk))
1074                 goto out_overflow;
1075
1076         if (np->rxopt.bits.osrcrt == 2 &&
1077             opt == NULL && treq->pktopts) {
1078                 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1079                 if (rxopt->srcrt)
1080                         opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1081         }
1082
1083         if (dst == NULL) {
1084                 struct in6_addr *final_p = NULL, final;
1085                 struct flowi fl;
1086
1087                 memset(&fl, 0, sizeof(fl));
1088                 fl.proto = IPPROTO_TCP;
1089                 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1090                 if (opt && opt->srcrt) {
1091                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1092                         ipv6_addr_copy(&final, &fl.fl6_dst);
1093                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1094                         final_p = &final;
1095                 }
1096                 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1097                 fl.oif = sk->sk_bound_dev_if;
1098                 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1099                 fl.fl_ip_sport = inet_sk(sk)->sport;
1100
1101                 if (ip6_dst_lookup(sk, &dst, &fl))
1102                         goto out;
1103
1104                 if (final_p)
1105                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1106
1107                 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1108                         goto out;
1109         } 
1110
1111         newsk = tcp_create_openreq_child(sk, req, skb);
1112         if (newsk == NULL)
1113                 goto out;
1114
1115         /*
1116          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1117          * count here, tcp_create_openreq_child now does this for us, see the
1118          * comment in that function for the gory details. -acme
1119          */
1120
1121         ip6_dst_store(newsk, dst, NULL);
1122         newsk->sk_route_caps = dst->dev->features &
1123                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1124
1125         newtcp6sk = (struct tcp6_sock *)newsk;
1126         inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1127
1128         newtp = tcp_sk(newsk);
1129         newinet = inet_sk(newsk);
1130         newnp = inet6_sk(newsk);
1131
1132         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1133
1134         ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1135         ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1136         ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1137         newsk->sk_bound_dev_if = treq->iif;
1138
1139         /* Now IPv6 options... 
1140
1141            First: no IPv4 options.
1142          */
1143         newinet->opt = NULL;
1144
1145         /* Clone RX bits */
1146         newnp->rxopt.all = np->rxopt.all;
1147
1148         /* Clone pktoptions received with SYN */
1149         newnp->pktoptions = NULL;
1150         if (treq->pktopts != NULL) {
1151                 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1152                 kfree_skb(treq->pktopts);
1153                 treq->pktopts = NULL;
1154                 if (newnp->pktoptions)
1155                         skb_set_owner_r(newnp->pktoptions, newsk);
1156         }
1157         newnp->opt        = NULL;
1158         newnp->mcast_oif  = inet6_iif(skb);
1159         newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1160
1161         /* Clone native IPv6 options from listening socket (if any)
1162
1163            Yes, keeping reference count would be much more clever,
1164            but we make one more one thing there: reattach optmem
1165            to newsk.
1166          */
1167         if (opt) {
1168                 newnp->opt = ipv6_dup_options(newsk, opt);
1169                 if (opt != np->opt)
1170                         sock_kfree_s(sk, opt, opt->tot_len);
1171         }
1172
1173         newtp->ext_header_len = 0;
1174         if (newnp->opt)
1175                 newtp->ext_header_len = newnp->opt->opt_nflen +
1176                                         newnp->opt->opt_flen;
1177
1178         tcp_sync_mss(newsk, dst_mtu(dst));
1179         newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1180         tcp_initialize_rcv_mss(newsk);
1181
1182         newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1183
1184         __inet6_hash(&tcp_hashinfo, newsk);
1185         inet_inherit_port(&tcp_hashinfo, sk, newsk);
1186
1187         return newsk;
1188
1189 out_overflow:
1190         NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1191 out:
1192         NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1193         if (opt && opt != np->opt)
1194                 sock_kfree_s(sk, opt, opt->tot_len);
1195         dst_release(dst);
1196         return NULL;
1197 }
1198
1199 static int tcp_v6_checksum_init(struct sk_buff *skb)
1200 {
1201         if (skb->ip_summed == CHECKSUM_HW) {
1202                 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1203                                   &skb->nh.ipv6h->daddr,skb->csum)) {
1204                         skb->ip_summed = CHECKSUM_UNNECESSARY;
1205                         return 0;
1206                 }
1207         }
1208
1209         skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1210                                   &skb->nh.ipv6h->daddr, 0);
1211
1212         if (skb->len <= 76) {
1213                 return __skb_checksum_complete(skb);
1214         }
1215         return 0;
1216 }
1217
1218 /* The socket must have it's spinlock held when we get
1219  * here.
1220  *
1221  * We have a potential double-lock case here, so even when
1222  * doing backlog processing we use the BH locking scheme.
1223  * This is because we cannot sleep with the original spinlock
1224  * held.
1225  */
1226 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1227 {
1228         struct ipv6_pinfo *np = inet6_sk(sk);
1229         struct tcp_sock *tp;
1230         struct sk_buff *opt_skb = NULL;
1231
1232         /* Imagine: socket is IPv6. IPv4 packet arrives,
1233            goes to IPv4 receive handler and backlogged.
1234            From backlog it always goes here. Kerboom...
1235            Fortunately, tcp_rcv_established and rcv_established
1236            handle them correctly, but it is not case with
1237            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1238          */
1239
1240         if (skb->protocol == htons(ETH_P_IP))
1241                 return tcp_v4_do_rcv(sk, skb);
1242
1243         if (sk_filter(sk, skb, 0))
1244                 goto discard;
1245
1246         /*
1247          *      socket locking is here for SMP purposes as backlog rcv
1248          *      is currently called with bh processing disabled.
1249          */
1250
1251         /* Do Stevens' IPV6_PKTOPTIONS.
1252
1253            Yes, guys, it is the only place in our code, where we
1254            may make it not affecting IPv4.
1255            The rest of code is protocol independent,
1256            and I do not like idea to uglify IPv4.
1257
1258            Actually, all the idea behind IPV6_PKTOPTIONS
1259            looks not very well thought. For now we latch
1260            options, received in the last packet, enqueued
1261            by tcp. Feel free to propose better solution.
1262                                                --ANK (980728)
1263          */
1264         if (np->rxopt.all)
1265                 opt_skb = skb_clone(skb, GFP_ATOMIC);
1266
1267         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1268                 TCP_CHECK_TIMER(sk);
1269                 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1270                         goto reset;
1271                 TCP_CHECK_TIMER(sk);
1272                 if (opt_skb)
1273                         goto ipv6_pktoptions;
1274                 return 0;
1275         }
1276
1277         if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1278                 goto csum_err;
1279
1280         if (sk->sk_state == TCP_LISTEN) { 
1281                 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1282                 if (!nsk)
1283                         goto discard;
1284
1285                 /*
1286                  * Queue it on the new socket if the new socket is active,
1287                  * otherwise we just shortcircuit this and continue with
1288                  * the new socket..
1289                  */
1290                 if(nsk != sk) {
1291                         if (tcp_child_process(sk, nsk, skb))
1292                                 goto reset;
1293                         if (opt_skb)
1294                                 __kfree_skb(opt_skb);
1295                         return 0;
1296                 }
1297         }
1298
1299         TCP_CHECK_TIMER(sk);
1300         if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1301                 goto reset;
1302         TCP_CHECK_TIMER(sk);
1303         if (opt_skb)
1304                 goto ipv6_pktoptions;
1305         return 0;
1306
1307 reset:
1308         tcp_v6_send_reset(skb);
1309 discard:
1310         if (opt_skb)
1311                 __kfree_skb(opt_skb);
1312         kfree_skb(skb);
1313         return 0;
1314 csum_err:
1315         TCP_INC_STATS_BH(TCP_MIB_INERRS);
1316         goto discard;
1317
1318
1319 ipv6_pktoptions:
1320         /* Do you ask, what is it?
1321
1322            1. skb was enqueued by tcp.
1323            2. skb is added to tail of read queue, rather than out of order.
1324            3. socket is not in passive state.
1325            4. Finally, it really contains options, which user wants to receive.
1326          */
1327         tp = tcp_sk(sk);
1328         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1329             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1330                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1331                         np->mcast_oif = inet6_iif(opt_skb);
1332                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1333                         np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1334                 if (ipv6_opt_accepted(sk, opt_skb)) {
1335                         skb_set_owner_r(opt_skb, sk);
1336                         opt_skb = xchg(&np->pktoptions, opt_skb);
1337                 } else {
1338                         __kfree_skb(opt_skb);
1339                         opt_skb = xchg(&np->pktoptions, NULL);
1340                 }
1341         }
1342
1343         if (opt_skb)
1344                 kfree_skb(opt_skb);
1345         return 0;
1346 }
1347
1348 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1349 {
1350         struct sk_buff *skb = *pskb;
1351         struct tcphdr *th;      
1352         struct sock *sk;
1353         int ret;
1354
1355         if (skb->pkt_type != PACKET_HOST)
1356                 goto discard_it;
1357
1358         /*
1359          *      Count it even if it's bad.
1360          */
1361         TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1362
1363         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1364                 goto discard_it;
1365
1366         th = skb->h.th;
1367
1368         if (th->doff < sizeof(struct tcphdr)/4)
1369                 goto bad_packet;
1370         if (!pskb_may_pull(skb, th->doff*4))
1371                 goto discard_it;
1372
1373         if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1374              tcp_v6_checksum_init(skb)))
1375                 goto bad_packet;
1376
1377         th = skb->h.th;
1378         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1379         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1380                                     skb->len - th->doff*4);
1381         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1382         TCP_SKB_CB(skb)->when = 0;
1383         TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1384         TCP_SKB_CB(skb)->sacked = 0;
1385
1386         sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
1387                             &skb->nh.ipv6h->daddr, ntohs(th->dest),
1388                             inet6_iif(skb));
1389
1390         if (!sk)
1391                 goto no_tcp_socket;
1392
1393 process:
1394         if (sk->sk_state == TCP_TIME_WAIT)
1395                 goto do_time_wait;
1396
1397         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1398                 goto discard_and_relse;
1399
1400         if (sk_filter(sk, skb, 0))
1401                 goto discard_and_relse;
1402
1403         skb->dev = NULL;
1404
1405         bh_lock_sock(sk);
1406         ret = 0;
1407         if (!sock_owned_by_user(sk)) {
1408                 if (!tcp_prequeue(sk, skb))
1409                         ret = tcp_v6_do_rcv(sk, skb);
1410         } else
1411                 sk_add_backlog(sk, skb);
1412         bh_unlock_sock(sk);
1413
1414         sock_put(sk);
1415         return ret ? -1 : 0;
1416
1417 no_tcp_socket:
1418         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1419                 goto discard_it;
1420
1421         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1422 bad_packet:
1423                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1424         } else {
1425                 tcp_v6_send_reset(skb);
1426         }
1427
1428 discard_it:
1429
1430         /*
1431          *      Discard frame
1432          */
1433
1434         kfree_skb(skb);
1435         return 0;
1436
1437 discard_and_relse:
1438         sock_put(sk);
1439         goto discard_it;
1440
1441 do_time_wait:
1442         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1443                 inet_twsk_put((struct inet_timewait_sock *)sk);
1444                 goto discard_it;
1445         }
1446
1447         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1448                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1449                 inet_twsk_put((struct inet_timewait_sock *)sk);
1450                 goto discard_it;
1451         }
1452
1453         switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1454                                            skb, th)) {
1455         case TCP_TW_SYN:
1456         {
1457                 struct sock *sk2;
1458
1459                 sk2 = inet6_lookup_listener(&tcp_hashinfo,
1460                                             &skb->nh.ipv6h->daddr,
1461                                             ntohs(th->dest), inet6_iif(skb));
1462                 if (sk2 != NULL) {
1463                         struct inet_timewait_sock *tw = inet_twsk(sk);
1464                         inet_twsk_deschedule(tw, &tcp_death_row);
1465                         inet_twsk_put(tw);
1466                         sk = sk2;
1467                         goto process;
1468                 }
1469                 /* Fall through to ACK */
1470         }
1471         case TCP_TW_ACK:
1472                 tcp_v6_timewait_ack(sk, skb);
1473                 break;
1474         case TCP_TW_RST:
1475                 goto no_tcp_socket;
1476         case TCP_TW_SUCCESS:;
1477         }
1478         goto discard_it;
1479 }
1480
1481 static int tcp_v6_remember_stamp(struct sock *sk)
1482 {
1483         /* Alas, not yet... */
1484         return 0;
1485 }
1486
1487 static struct inet_connection_sock_af_ops ipv6_specific = {
1488         .queue_xmit     =       inet6_csk_xmit,
1489         .send_check     =       tcp_v6_send_check,
1490         .rebuild_header =       inet6_sk_rebuild_header,
1491         .conn_request   =       tcp_v6_conn_request,
1492         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1493         .remember_stamp =       tcp_v6_remember_stamp,
1494         .net_header_len =       sizeof(struct ipv6hdr),
1495
1496         .setsockopt     =       ipv6_setsockopt,
1497         .getsockopt     =       ipv6_getsockopt,
1498         .addr2sockaddr  =       inet6_csk_addr2sockaddr,
1499         .sockaddr_len   =       sizeof(struct sockaddr_in6)
1500 };
1501
1502 /*
1503  *      TCP over IPv4 via INET6 API
1504  */
1505
1506 static struct inet_connection_sock_af_ops ipv6_mapped = {
1507         .queue_xmit     =       ip_queue_xmit,
1508         .send_check     =       tcp_v4_send_check,
1509         .rebuild_header =       inet_sk_rebuild_header,
1510         .conn_request   =       tcp_v6_conn_request,
1511         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1512         .remember_stamp =       tcp_v4_remember_stamp,
1513         .net_header_len =       sizeof(struct iphdr),
1514
1515         .setsockopt     =       ipv6_setsockopt,
1516         .getsockopt     =       ipv6_getsockopt,
1517         .addr2sockaddr  =       inet6_csk_addr2sockaddr,
1518         .sockaddr_len   =       sizeof(struct sockaddr_in6)
1519 };
1520
1521
1522
1523 /* NOTE: A lot of things set to zero explicitly by call to
1524  *       sk_alloc() so need not be done here.
1525  */
1526 static int tcp_v6_init_sock(struct sock *sk)
1527 {
1528         struct inet_connection_sock *icsk = inet_csk(sk);
1529         struct tcp_sock *tp = tcp_sk(sk);
1530
1531         skb_queue_head_init(&tp->out_of_order_queue);
1532         tcp_init_xmit_timers(sk);
1533         tcp_prequeue_init(tp);
1534
1535         icsk->icsk_rto = TCP_TIMEOUT_INIT;
1536         tp->mdev = TCP_TIMEOUT_INIT;
1537
1538         /* So many TCP implementations out there (incorrectly) count the
1539          * initial SYN frame in their delayed-ACK and congestion control
1540          * algorithms that we must have the following bandaid to talk
1541          * efficiently to them.  -DaveM
1542          */
1543         tp->snd_cwnd = 2;
1544
1545         /* See draft-stevens-tcpca-spec-01 for discussion of the
1546          * initialization of these values.
1547          */
1548         tp->snd_ssthresh = 0x7fffffff;
1549         tp->snd_cwnd_clamp = ~0;
1550         tp->mss_cache = 536;
1551
1552         tp->reordering = sysctl_tcp_reordering;
1553
1554         sk->sk_state = TCP_CLOSE;
1555
1556         icsk->icsk_af_ops = &ipv6_specific;
1557         icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1558         sk->sk_write_space = sk_stream_write_space;
1559         sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1560
1561         sk->sk_sndbuf = sysctl_tcp_wmem[1];
1562         sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1563
1564         atomic_inc(&tcp_sockets_allocated);
1565
1566         return 0;
1567 }
1568
1569 static int tcp_v6_destroy_sock(struct sock *sk)
1570 {
1571         tcp_v4_destroy_sock(sk);
1572         return inet6_destroy_sock(sk);
1573 }
1574
1575 /* Proc filesystem TCPv6 sock list dumping. */
1576 static void get_openreq6(struct seq_file *seq, 
1577                          struct sock *sk, struct request_sock *req, int i, int uid)
1578 {
1579         int ttd = req->expires - jiffies;
1580         struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1581         struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1582
1583         if (ttd < 0)
1584                 ttd = 0;
1585
1586         seq_printf(seq,
1587                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1588                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1589                    i,
1590                    src->s6_addr32[0], src->s6_addr32[1],
1591                    src->s6_addr32[2], src->s6_addr32[3],
1592                    ntohs(inet_sk(sk)->sport),
1593                    dest->s6_addr32[0], dest->s6_addr32[1],
1594                    dest->s6_addr32[2], dest->s6_addr32[3],
1595                    ntohs(inet_rsk(req)->rmt_port),
1596                    TCP_SYN_RECV,
1597                    0,0, /* could print option size, but that is af dependent. */
1598                    1,   /* timers active (only the expire timer) */  
1599                    jiffies_to_clock_t(ttd), 
1600                    req->retrans,
1601                    uid,
1602                    0,  /* non standard timer */  
1603                    0, /* open_requests have no inode */
1604                    0, req);
1605 }
1606
1607 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1608 {
1609         struct in6_addr *dest, *src;
1610         __u16 destp, srcp;
1611         int timer_active;
1612         unsigned long timer_expires;
1613         struct inet_sock *inet = inet_sk(sp);
1614         struct tcp_sock *tp = tcp_sk(sp);
1615         const struct inet_connection_sock *icsk = inet_csk(sp);
1616         struct ipv6_pinfo *np = inet6_sk(sp);
1617
1618         dest  = &np->daddr;
1619         src   = &np->rcv_saddr;
1620         destp = ntohs(inet->dport);
1621         srcp  = ntohs(inet->sport);
1622
1623         if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1624                 timer_active    = 1;
1625                 timer_expires   = icsk->icsk_timeout;
1626         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1627                 timer_active    = 4;
1628                 timer_expires   = icsk->icsk_timeout;
1629         } else if (timer_pending(&sp->sk_timer)) {
1630                 timer_active    = 2;
1631                 timer_expires   = sp->sk_timer.expires;
1632         } else {
1633                 timer_active    = 0;
1634                 timer_expires = jiffies;
1635         }
1636
1637         seq_printf(seq,
1638                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1639                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
1640                    i,
1641                    src->s6_addr32[0], src->s6_addr32[1],
1642                    src->s6_addr32[2], src->s6_addr32[3], srcp,
1643                    dest->s6_addr32[0], dest->s6_addr32[1],
1644                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
1645                    sp->sk_state, 
1646                    tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
1647                    timer_active,
1648                    jiffies_to_clock_t(timer_expires - jiffies),
1649                    icsk->icsk_retransmits,
1650                    sock_i_uid(sp),
1651                    icsk->icsk_probes_out,
1652                    sock_i_ino(sp),
1653                    atomic_read(&sp->sk_refcnt), sp,
1654                    icsk->icsk_rto,
1655                    icsk->icsk_ack.ato,
1656                    (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1657                    tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
1658                    );
1659 }
1660
1661 static void get_timewait6_sock(struct seq_file *seq, 
1662                                struct inet_timewait_sock *tw, int i)
1663 {
1664         struct in6_addr *dest, *src;
1665         __u16 destp, srcp;
1666         struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
1667         int ttd = tw->tw_ttd - jiffies;
1668
1669         if (ttd < 0)
1670                 ttd = 0;
1671
1672         dest = &tw6->tw_v6_daddr;
1673         src  = &tw6->tw_v6_rcv_saddr;
1674         destp = ntohs(tw->tw_dport);
1675         srcp  = ntohs(tw->tw_sport);
1676
1677         seq_printf(seq,
1678                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1679                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1680                    i,
1681                    src->s6_addr32[0], src->s6_addr32[1],
1682                    src->s6_addr32[2], src->s6_addr32[3], srcp,
1683                    dest->s6_addr32[0], dest->s6_addr32[1],
1684                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
1685                    tw->tw_substate, 0, 0,
1686                    3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
1687                    atomic_read(&tw->tw_refcnt), tw);
1688 }
1689
1690 #ifdef CONFIG_PROC_FS
1691 static int tcp6_seq_show(struct seq_file *seq, void *v)
1692 {
1693         struct tcp_iter_state *st;
1694
1695         if (v == SEQ_START_TOKEN) {
1696                 seq_puts(seq,
1697                          "  sl  "
1698                          "local_address                         "
1699                          "remote_address                        "
1700                          "st tx_queue rx_queue tr tm->when retrnsmt"
1701                          "   uid  timeout inode\n");
1702                 goto out;
1703         }
1704         st = seq->private;
1705
1706         switch (st->state) {
1707         case TCP_SEQ_STATE_LISTENING:
1708         case TCP_SEQ_STATE_ESTABLISHED:
1709                 get_tcp6_sock(seq, v, st->num);
1710                 break;
1711         case TCP_SEQ_STATE_OPENREQ:
1712                 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
1713                 break;
1714         case TCP_SEQ_STATE_TIME_WAIT:
1715                 get_timewait6_sock(seq, v, st->num);
1716                 break;
1717         }
1718 out:
1719         return 0;
1720 }
1721
1722 static struct file_operations tcp6_seq_fops;
1723 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1724         .owner          = THIS_MODULE,
1725         .name           = "tcp6",
1726         .family         = AF_INET6,
1727         .seq_show       = tcp6_seq_show,
1728         .seq_fops       = &tcp6_seq_fops,
1729 };
1730
1731 int __init tcp6_proc_init(void)
1732 {
1733         return tcp_proc_register(&tcp6_seq_afinfo);
1734 }
1735
1736 void tcp6_proc_exit(void)
1737 {
1738         tcp_proc_unregister(&tcp6_seq_afinfo);
1739 }
1740 #endif
1741
1742 struct proto tcpv6_prot = {
1743         .name                   = "TCPv6",
1744         .owner                  = THIS_MODULE,
1745         .close                  = tcp_close,
1746         .connect                = tcp_v6_connect,
1747         .disconnect             = tcp_disconnect,
1748         .accept                 = inet_csk_accept,
1749         .ioctl                  = tcp_ioctl,
1750         .init                   = tcp_v6_init_sock,
1751         .destroy                = tcp_v6_destroy_sock,
1752         .shutdown               = tcp_shutdown,
1753         .setsockopt             = tcp_setsockopt,
1754         .getsockopt             = tcp_getsockopt,
1755         .sendmsg                = tcp_sendmsg,
1756         .recvmsg                = tcp_recvmsg,
1757         .backlog_rcv            = tcp_v6_do_rcv,
1758         .hash                   = tcp_v6_hash,
1759         .unhash                 = tcp_unhash,
1760         .get_port               = tcp_v6_get_port,
1761         .enter_memory_pressure  = tcp_enter_memory_pressure,
1762         .sockets_allocated      = &tcp_sockets_allocated,
1763         .memory_allocated       = &tcp_memory_allocated,
1764         .memory_pressure        = &tcp_memory_pressure,
1765         .orphan_count           = &tcp_orphan_count,
1766         .sysctl_mem             = sysctl_tcp_mem,
1767         .sysctl_wmem            = sysctl_tcp_wmem,
1768         .sysctl_rmem            = sysctl_tcp_rmem,
1769         .max_header             = MAX_TCP_HEADER,
1770         .obj_size               = sizeof(struct tcp6_sock),
1771         .twsk_obj_size          = sizeof(struct tcp6_timewait_sock),
1772         .rsk_prot               = &tcp6_request_sock_ops,
1773 };
1774
1775 static struct inet6_protocol tcpv6_protocol = {
1776         .handler        =       tcp_v6_rcv,
1777         .err_handler    =       tcp_v6_err,
1778         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1779 };
1780
1781 static struct inet_protosw tcpv6_protosw = {
1782         .type           =       SOCK_STREAM,
1783         .protocol       =       IPPROTO_TCP,
1784         .prot           =       &tcpv6_prot,
1785         .ops            =       &inet6_stream_ops,
1786         .capability     =       -1,
1787         .no_check       =       0,
1788         .flags          =       INET_PROTOSW_PERMANENT,
1789 };
1790
1791 void __init tcpv6_init(void)
1792 {
1793         /* register inet6 protocol */
1794         if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
1795                 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
1796         inet6_register_protosw(&tcpv6_protosw);
1797 }