[TWSK]: Introduce struct timewait_sock_ops
[safe/jmp/linux-2.6] / net / ipv6 / tcp_ipv6.c
1 /*
2  *      TCP over IPv6
3  *      Linux INET6 implementation 
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
9  *
10  *      Based on: 
11  *      linux/net/ipv4/tcp.c
12  *      linux/net/ipv4/tcp_input.c
13  *      linux/net/ipv4/tcp_output.c
14  *
15  *      Fixes:
16  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
17  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
18  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
19  *                                      a single port at the same time.
20  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
21  *
22  *      This program is free software; you can redistribute it and/or
23  *      modify it under the terms of the GNU General Public License
24  *      as published by the Free Software Foundation; either version
25  *      2 of the License, or (at your option) any later version.
26  */
27
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
36 #include <linux/in.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
43
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
47
48 #include <net/tcp.h>
49 #include <net/ndisc.h>
50 #include <net/inet6_hashtables.h>
51 #include <net/inet6_connection_sock.h>
52 #include <net/ipv6.h>
53 #include <net/transp_v6.h>
54 #include <net/addrconf.h>
55 #include <net/ip6_route.h>
56 #include <net/ip6_checksum.h>
57 #include <net/inet_ecn.h>
58 #include <net/protocol.h>
59 #include <net/xfrm.h>
60 #include <net/addrconf.h>
61 #include <net/snmp.h>
62 #include <net/dsfield.h>
63 #include <net/timewait_sock.h>
64
65 #include <asm/uaccess.h>
66
67 #include <linux/proc_fs.h>
68 #include <linux/seq_file.h>
69
70 static void     tcp_v6_send_reset(struct sk_buff *skb);
71 static void     tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
72 static void     tcp_v6_send_check(struct sock *sk, int len, 
73                                   struct sk_buff *skb);
74
75 static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static struct inet_connection_sock_af_ops ipv6_mapped;
78 static struct inet_connection_sock_af_ops ipv6_specific;
79
80 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
81 {
82         return inet_csk_get_port(&tcp_hashinfo, sk, snum,
83                                  inet6_csk_bind_conflict);
84 }
85
86 static void tcp_v6_hash(struct sock *sk)
87 {
88         if (sk->sk_state != TCP_CLOSE) {
89                 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
90                         tcp_prot.hash(sk);
91                         return;
92                 }
93                 local_bh_disable();
94                 __inet6_hash(&tcp_hashinfo, sk);
95                 local_bh_enable();
96         }
97 }
98
99 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
100                                    struct in6_addr *saddr, 
101                                    struct in6_addr *daddr, 
102                                    unsigned long base)
103 {
104         return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
105 }
106
107 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
108 {
109         if (skb->protocol == htons(ETH_P_IPV6)) {
110                 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
111                                                     skb->nh.ipv6h->saddr.s6_addr32,
112                                                     skb->h.th->dest,
113                                                     skb->h.th->source);
114         } else {
115                 return secure_tcp_sequence_number(skb->nh.iph->daddr,
116                                                   skb->nh.iph->saddr,
117                                                   skb->h.th->dest,
118                                                   skb->h.th->source);
119         }
120 }
121
122 static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
123                                       struct inet_timewait_sock **twp)
124 {
125         struct inet_sock *inet = inet_sk(sk);
126         const struct ipv6_pinfo *np = inet6_sk(sk);
127         const struct in6_addr *daddr = &np->rcv_saddr;
128         const struct in6_addr *saddr = &np->daddr;
129         const int dif = sk->sk_bound_dev_if;
130         const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
131         unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport);
132         struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
133         struct sock *sk2;
134         const struct hlist_node *node;
135         struct inet_timewait_sock *tw;
136
137         prefetch(head->chain.first);
138         write_lock(&head->lock);
139
140         /* Check TIME-WAIT sockets first. */
141         sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
142                 const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2);
143
144                 tw = inet_twsk(sk2);
145
146                 if(*((__u32 *)&(tw->tw_dport))  == ports        &&
147                    sk2->sk_family               == PF_INET6     &&
148                    ipv6_addr_equal(&tw6->tw_v6_daddr, saddr)    &&
149                    ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr)        &&
150                    sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
151                         if (twsk_unique(sk, sk2, twp))
152                                 goto unique;
153                         else
154                                 goto not_unique;
155                 }
156         }
157         tw = NULL;
158
159         /* And established part... */
160         sk_for_each(sk2, node, &head->chain) {
161                 if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
162                         goto not_unique;
163         }
164
165 unique:
166         BUG_TRAP(sk_unhashed(sk));
167         __sk_add_node(sk, &head->chain);
168         sk->sk_hash = hash;
169         sock_prot_inc_use(sk->sk_prot);
170         write_unlock(&head->lock);
171
172         if (twp) {
173                 *twp = tw;
174                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
175         } else if (tw) {
176                 /* Silly. Should hash-dance instead... */
177                 inet_twsk_deschedule(tw, &tcp_death_row);
178                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
179
180                 inet_twsk_put(tw);
181         }
182         return 0;
183
184 not_unique:
185         write_unlock(&head->lock);
186         return -EADDRNOTAVAIL;
187 }
188
189 static inline u32 tcpv6_port_offset(const struct sock *sk)
190 {
191         const struct inet_sock *inet = inet_sk(sk);
192         const struct ipv6_pinfo *np = inet6_sk(sk);
193
194         return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
195                                            np->daddr.s6_addr32,
196                                            inet->dport);
197 }
198
199 static int tcp_v6_hash_connect(struct sock *sk)
200 {
201         unsigned short snum = inet_sk(sk)->num;
202         struct inet_bind_hashbucket *head;
203         struct inet_bind_bucket *tb;
204         int ret;
205
206         if (!snum) {
207                 int low = sysctl_local_port_range[0];
208                 int high = sysctl_local_port_range[1];
209                 int range = high - low;
210                 int i;
211                 int port;
212                 static u32 hint;
213                 u32 offset = hint + tcpv6_port_offset(sk);
214                 struct hlist_node *node;
215                 struct inet_timewait_sock *tw = NULL;
216
217                 local_bh_disable();
218                 for (i = 1; i <= range; i++) {
219                         port = low + (i + offset) % range;
220                         head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
221                         spin_lock(&head->lock);
222
223                         /* Does not bother with rcv_saddr checks,
224                          * because the established check is already
225                          * unique enough.
226                          */
227                         inet_bind_bucket_for_each(tb, node, &head->chain) {
228                                 if (tb->port == port) {
229                                         BUG_TRAP(!hlist_empty(&tb->owners));
230                                         if (tb->fastreuse >= 0)
231                                                 goto next_port;
232                                         if (!__tcp_v6_check_established(sk,
233                                                                         port,
234                                                                         &tw))
235                                                 goto ok;
236                                         goto next_port;
237                                 }
238                         }
239
240                         tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
241                         if (!tb) {
242                                 spin_unlock(&head->lock);
243                                 break;
244                         }
245                         tb->fastreuse = -1;
246                         goto ok;
247
248                 next_port:
249                         spin_unlock(&head->lock);
250                 }
251                 local_bh_enable();
252
253                 return -EADDRNOTAVAIL;
254
255 ok:
256                 hint += i;
257
258                 /* Head lock still held and bh's disabled */
259                 inet_bind_hash(sk, tb, port);
260                 if (sk_unhashed(sk)) {
261                         inet_sk(sk)->sport = htons(port);
262                         __inet6_hash(&tcp_hashinfo, sk);
263                 }
264                 spin_unlock(&head->lock);
265
266                 if (tw) {
267                         inet_twsk_deschedule(tw, &tcp_death_row);
268                         inet_twsk_put(tw);
269                 }
270
271                 ret = 0;
272                 goto out;
273         }
274
275         head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
276         tb   = inet_csk(sk)->icsk_bind_hash;
277         spin_lock_bh(&head->lock);
278
279         if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
280                 __inet6_hash(&tcp_hashinfo, sk);
281                 spin_unlock_bh(&head->lock);
282                 return 0;
283         } else {
284                 spin_unlock(&head->lock);
285                 /* No definite answer... Walk to established hash table */
286                 ret = __tcp_v6_check_established(sk, snum, NULL);
287 out:
288                 local_bh_enable();
289                 return ret;
290         }
291 }
292
293 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 
294                           int addr_len)
295 {
296         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
297         struct inet_sock *inet = inet_sk(sk);
298         struct ipv6_pinfo *np = inet6_sk(sk);
299         struct tcp_sock *tp = tcp_sk(sk);
300         struct in6_addr *saddr = NULL, *final_p = NULL, final;
301         struct flowi fl;
302         struct dst_entry *dst;
303         int addr_type;
304         int err;
305
306         if (addr_len < SIN6_LEN_RFC2133) 
307                 return -EINVAL;
308
309         if (usin->sin6_family != AF_INET6) 
310                 return(-EAFNOSUPPORT);
311
312         memset(&fl, 0, sizeof(fl));
313
314         if (np->sndflow) {
315                 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
316                 IP6_ECN_flow_init(fl.fl6_flowlabel);
317                 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
318                         struct ip6_flowlabel *flowlabel;
319                         flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
320                         if (flowlabel == NULL)
321                                 return -EINVAL;
322                         ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
323                         fl6_sock_release(flowlabel);
324                 }
325         }
326
327         /*
328          *      connect() to INADDR_ANY means loopback (BSD'ism).
329          */
330         
331         if(ipv6_addr_any(&usin->sin6_addr))
332                 usin->sin6_addr.s6_addr[15] = 0x1; 
333
334         addr_type = ipv6_addr_type(&usin->sin6_addr);
335
336         if(addr_type & IPV6_ADDR_MULTICAST)
337                 return -ENETUNREACH;
338
339         if (addr_type&IPV6_ADDR_LINKLOCAL) {
340                 if (addr_len >= sizeof(struct sockaddr_in6) &&
341                     usin->sin6_scope_id) {
342                         /* If interface is set while binding, indices
343                          * must coincide.
344                          */
345                         if (sk->sk_bound_dev_if &&
346                             sk->sk_bound_dev_if != usin->sin6_scope_id)
347                                 return -EINVAL;
348
349                         sk->sk_bound_dev_if = usin->sin6_scope_id;
350                 }
351
352                 /* Connect to link-local address requires an interface */
353                 if (!sk->sk_bound_dev_if)
354                         return -EINVAL;
355         }
356
357         if (tp->rx_opt.ts_recent_stamp &&
358             !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
359                 tp->rx_opt.ts_recent = 0;
360                 tp->rx_opt.ts_recent_stamp = 0;
361                 tp->write_seq = 0;
362         }
363
364         ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
365         np->flow_label = fl.fl6_flowlabel;
366
367         /*
368          *      TCP over IPv4
369          */
370
371         if (addr_type == IPV6_ADDR_MAPPED) {
372                 u32 exthdrlen = tp->ext_header_len;
373                 struct sockaddr_in sin;
374
375                 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
376
377                 if (__ipv6_only_sock(sk))
378                         return -ENETUNREACH;
379
380                 sin.sin_family = AF_INET;
381                 sin.sin_port = usin->sin6_port;
382                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
383
384                 inet_csk(sk)->icsk_af_ops = &ipv6_mapped;
385                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
386
387                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
388
389                 if (err) {
390                         tp->ext_header_len = exthdrlen;
391                         inet_csk(sk)->icsk_af_ops = &ipv6_specific;
392                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
393                         goto failure;
394                 } else {
395                         ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
396                                       inet->saddr);
397                         ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
398                                       inet->rcv_saddr);
399                 }
400
401                 return err;
402         }
403
404         if (!ipv6_addr_any(&np->rcv_saddr))
405                 saddr = &np->rcv_saddr;
406
407         fl.proto = IPPROTO_TCP;
408         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
409         ipv6_addr_copy(&fl.fl6_src,
410                        (saddr ? saddr : &np->saddr));
411         fl.oif = sk->sk_bound_dev_if;
412         fl.fl_ip_dport = usin->sin6_port;
413         fl.fl_ip_sport = inet->sport;
414
415         if (np->opt && np->opt->srcrt) {
416                 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
417                 ipv6_addr_copy(&final, &fl.fl6_dst);
418                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
419                 final_p = &final;
420         }
421
422         err = ip6_dst_lookup(sk, &dst, &fl);
423         if (err)
424                 goto failure;
425         if (final_p)
426                 ipv6_addr_copy(&fl.fl6_dst, final_p);
427
428         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
429                 goto failure;
430
431         if (saddr == NULL) {
432                 saddr = &fl.fl6_src;
433                 ipv6_addr_copy(&np->rcv_saddr, saddr);
434         }
435
436         /* set the source address */
437         ipv6_addr_copy(&np->saddr, saddr);
438         inet->rcv_saddr = LOOPBACK4_IPV6;
439
440         ip6_dst_store(sk, dst, NULL);
441         sk->sk_route_caps = dst->dev->features &
442                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
443
444         tp->ext_header_len = 0;
445         if (np->opt)
446                 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
447
448         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
449
450         inet->dport = usin->sin6_port;
451
452         tcp_set_state(sk, TCP_SYN_SENT);
453         err = tcp_v6_hash_connect(sk);
454         if (err)
455                 goto late_failure;
456
457         if (!tp->write_seq)
458                 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
459                                                              np->daddr.s6_addr32,
460                                                              inet->sport,
461                                                              inet->dport);
462
463         err = tcp_connect(sk);
464         if (err)
465                 goto late_failure;
466
467         return 0;
468
469 late_failure:
470         tcp_set_state(sk, TCP_CLOSE);
471         __sk_dst_reset(sk);
472 failure:
473         inet->dport = 0;
474         sk->sk_route_caps = 0;
475         return err;
476 }
477
478 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
479                 int type, int code, int offset, __u32 info)
480 {
481         struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
482         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
483         struct ipv6_pinfo *np;
484         struct sock *sk;
485         int err;
486         struct tcp_sock *tp; 
487         __u32 seq;
488
489         sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
490                           th->source, skb->dev->ifindex);
491
492         if (sk == NULL) {
493                 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
494                 return;
495         }
496
497         if (sk->sk_state == TCP_TIME_WAIT) {
498                 inet_twsk_put((struct inet_timewait_sock *)sk);
499                 return;
500         }
501
502         bh_lock_sock(sk);
503         if (sock_owned_by_user(sk))
504                 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
505
506         if (sk->sk_state == TCP_CLOSE)
507                 goto out;
508
509         tp = tcp_sk(sk);
510         seq = ntohl(th->seq); 
511         if (sk->sk_state != TCP_LISTEN &&
512             !between(seq, tp->snd_una, tp->snd_nxt)) {
513                 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
514                 goto out;
515         }
516
517         np = inet6_sk(sk);
518
519         if (type == ICMPV6_PKT_TOOBIG) {
520                 struct dst_entry *dst = NULL;
521
522                 if (sock_owned_by_user(sk))
523                         goto out;
524                 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
525                         goto out;
526
527                 /* icmp should have updated the destination cache entry */
528                 dst = __sk_dst_check(sk, np->dst_cookie);
529
530                 if (dst == NULL) {
531                         struct inet_sock *inet = inet_sk(sk);
532                         struct flowi fl;
533
534                         /* BUGGG_FUTURE: Again, it is not clear how
535                            to handle rthdr case. Ignore this complexity
536                            for now.
537                          */
538                         memset(&fl, 0, sizeof(fl));
539                         fl.proto = IPPROTO_TCP;
540                         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
541                         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
542                         fl.oif = sk->sk_bound_dev_if;
543                         fl.fl_ip_dport = inet->dport;
544                         fl.fl_ip_sport = inet->sport;
545
546                         if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
547                                 sk->sk_err_soft = -err;
548                                 goto out;
549                         }
550
551                         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
552                                 sk->sk_err_soft = -err;
553                                 goto out;
554                         }
555
556                 } else
557                         dst_hold(dst);
558
559                 if (tp->pmtu_cookie > dst_mtu(dst)) {
560                         tcp_sync_mss(sk, dst_mtu(dst));
561                         tcp_simple_retransmit(sk);
562                 } /* else let the usual retransmit timer handle it */
563                 dst_release(dst);
564                 goto out;
565         }
566
567         icmpv6_err_convert(type, code, &err);
568
569         /* Might be for an request_sock */
570         switch (sk->sk_state) {
571                 struct request_sock *req, **prev;
572         case TCP_LISTEN:
573                 if (sock_owned_by_user(sk))
574                         goto out;
575
576                 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
577                                            &hdr->saddr, inet6_iif(skb));
578                 if (!req)
579                         goto out;
580
581                 /* ICMPs are not backlogged, hence we cannot get
582                  * an established socket here.
583                  */
584                 BUG_TRAP(req->sk == NULL);
585
586                 if (seq != tcp_rsk(req)->snt_isn) {
587                         NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
588                         goto out;
589                 }
590
591                 inet_csk_reqsk_queue_drop(sk, req, prev);
592                 goto out;
593
594         case TCP_SYN_SENT:
595         case TCP_SYN_RECV:  /* Cannot happen.
596                                It can, it SYNs are crossed. --ANK */ 
597                 if (!sock_owned_by_user(sk)) {
598                         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
599                         sk->sk_err = err;
600                         sk->sk_error_report(sk);                /* Wake people up to see the error (see connect in sock.c) */
601
602                         tcp_done(sk);
603                 } else
604                         sk->sk_err_soft = err;
605                 goto out;
606         }
607
608         if (!sock_owned_by_user(sk) && np->recverr) {
609                 sk->sk_err = err;
610                 sk->sk_error_report(sk);
611         } else
612                 sk->sk_err_soft = err;
613
614 out:
615         bh_unlock_sock(sk);
616         sock_put(sk);
617 }
618
619
620 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
621                               struct dst_entry *dst)
622 {
623         struct inet6_request_sock *treq = inet6_rsk(req);
624         struct ipv6_pinfo *np = inet6_sk(sk);
625         struct sk_buff * skb;
626         struct ipv6_txoptions *opt = NULL;
627         struct in6_addr * final_p = NULL, final;
628         struct flowi fl;
629         int err = -1;
630
631         memset(&fl, 0, sizeof(fl));
632         fl.proto = IPPROTO_TCP;
633         ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
634         ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
635         fl.fl6_flowlabel = 0;
636         fl.oif = treq->iif;
637         fl.fl_ip_dport = inet_rsk(req)->rmt_port;
638         fl.fl_ip_sport = inet_sk(sk)->sport;
639
640         if (dst == NULL) {
641                 opt = np->opt;
642                 if (opt == NULL &&
643                     np->rxopt.bits.osrcrt == 2 &&
644                     treq->pktopts) {
645                         struct sk_buff *pktopts = treq->pktopts;
646                         struct inet6_skb_parm *rxopt = IP6CB(pktopts);
647                         if (rxopt->srcrt)
648                                 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
649                 }
650
651                 if (opt && opt->srcrt) {
652                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
653                         ipv6_addr_copy(&final, &fl.fl6_dst);
654                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
655                         final_p = &final;
656                 }
657
658                 err = ip6_dst_lookup(sk, &dst, &fl);
659                 if (err)
660                         goto done;
661                 if (final_p)
662                         ipv6_addr_copy(&fl.fl6_dst, final_p);
663                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
664                         goto done;
665         }
666
667         skb = tcp_make_synack(sk, dst, req);
668         if (skb) {
669                 struct tcphdr *th = skb->h.th;
670
671                 th->check = tcp_v6_check(th, skb->len,
672                                          &treq->loc_addr, &treq->rmt_addr,
673                                          csum_partial((char *)th, skb->len, skb->csum));
674
675                 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
676                 err = ip6_xmit(sk, skb, &fl, opt, 0);
677                 if (err == NET_XMIT_CN)
678                         err = 0;
679         }
680
681 done:
682         if (opt && opt != np->opt)
683                 sock_kfree_s(sk, opt, opt->tot_len);
684         return err;
685 }
686
687 static void tcp_v6_reqsk_destructor(struct request_sock *req)
688 {
689         if (inet6_rsk(req)->pktopts)
690                 kfree_skb(inet6_rsk(req)->pktopts);
691 }
692
693 static struct request_sock_ops tcp6_request_sock_ops = {
694         .family         =       AF_INET6,
695         .obj_size       =       sizeof(struct tcp6_request_sock),
696         .rtx_syn_ack    =       tcp_v6_send_synack,
697         .send_ack       =       tcp_v6_reqsk_send_ack,
698         .destructor     =       tcp_v6_reqsk_destructor,
699         .send_reset     =       tcp_v6_send_reset
700 };
701
702 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
703         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
704         .twsk_unique    = tcp_twsk_unique,
705 };
706
707 static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
708 {
709         struct ipv6_pinfo *np = inet6_sk(sk);
710         struct tcphdr *th = skb->h.th;
711
712         if (skb->ip_summed == CHECKSUM_HW) {
713                 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
714                 skb->csum = offsetof(struct tcphdr, check);
715         } else {
716                 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 
717                                             csum_partial((char *)th, th->doff<<2, 
718                                                          skb->csum));
719         }
720 }
721
722
723 static void tcp_v6_send_reset(struct sk_buff *skb)
724 {
725         struct tcphdr *th = skb->h.th, *t1; 
726         struct sk_buff *buff;
727         struct flowi fl;
728
729         if (th->rst)
730                 return;
731
732         if (!ipv6_unicast_destination(skb))
733                 return; 
734
735         /*
736          * We need to grab some memory, and put together an RST,
737          * and then put it into the queue to be sent.
738          */
739
740         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
741                          GFP_ATOMIC);
742         if (buff == NULL) 
743                 return;
744
745         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
746
747         t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
748
749         /* Swap the send and the receive. */
750         memset(t1, 0, sizeof(*t1));
751         t1->dest = th->source;
752         t1->source = th->dest;
753         t1->doff = sizeof(*t1)/4;
754         t1->rst = 1;
755   
756         if(th->ack) {
757                 t1->seq = th->ack_seq;
758         } else {
759                 t1->ack = 1;
760                 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
761                                     + skb->len - (th->doff<<2));
762         }
763
764         buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
765
766         memset(&fl, 0, sizeof(fl));
767         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
768         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
769
770         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
771                                     sizeof(*t1), IPPROTO_TCP,
772                                     buff->csum);
773
774         fl.proto = IPPROTO_TCP;
775         fl.oif = inet6_iif(skb);
776         fl.fl_ip_dport = t1->dest;
777         fl.fl_ip_sport = t1->source;
778
779         /* sk = NULL, but it is safe for now. RST socket required. */
780         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
781
782                 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
783                         ip6_xmit(NULL, buff, &fl, NULL, 0);
784                         TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
785                         TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
786                         return;
787                 }
788         }
789
790         kfree_skb(buff);
791 }
792
793 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
794 {
795         struct tcphdr *th = skb->h.th, *t1;
796         struct sk_buff *buff;
797         struct flowi fl;
798         int tot_len = sizeof(struct tcphdr);
799
800         if (ts)
801                 tot_len += 3*4;
802
803         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
804                          GFP_ATOMIC);
805         if (buff == NULL)
806                 return;
807
808         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
809
810         t1 = (struct tcphdr *) skb_push(buff,tot_len);
811
812         /* Swap the send and the receive. */
813         memset(t1, 0, sizeof(*t1));
814         t1->dest = th->source;
815         t1->source = th->dest;
816         t1->doff = tot_len/4;
817         t1->seq = htonl(seq);
818         t1->ack_seq = htonl(ack);
819         t1->ack = 1;
820         t1->window = htons(win);
821         
822         if (ts) {
823                 u32 *ptr = (u32*)(t1 + 1);
824                 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
825                                (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
826                 *ptr++ = htonl(tcp_time_stamp);
827                 *ptr = htonl(ts);
828         }
829
830         buff->csum = csum_partial((char *)t1, tot_len, 0);
831
832         memset(&fl, 0, sizeof(fl));
833         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
834         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
835
836         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
837                                     tot_len, IPPROTO_TCP,
838                                     buff->csum);
839
840         fl.proto = IPPROTO_TCP;
841         fl.oif = inet6_iif(skb);
842         fl.fl_ip_dport = t1->dest;
843         fl.fl_ip_sport = t1->source;
844
845         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
846                 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
847                         ip6_xmit(NULL, buff, &fl, NULL, 0);
848                         TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
849                         return;
850                 }
851         }
852
853         kfree_skb(buff);
854 }
855
856 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
857 {
858         struct inet_timewait_sock *tw = inet_twsk(sk);
859         const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
860
861         tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
862                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
863                         tcptw->tw_ts_recent);
864
865         inet_twsk_put(tw);
866 }
867
868 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
869 {
870         tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
871 }
872
873
874 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
875 {
876         struct request_sock *req, **prev;
877         const struct tcphdr *th = skb->h.th;
878         struct sock *nsk;
879
880         /* Find possible connection requests. */
881         req = inet6_csk_search_req(sk, &prev, th->source,
882                                    &skb->nh.ipv6h->saddr,
883                                    &skb->nh.ipv6h->daddr, inet6_iif(skb));
884         if (req)
885                 return tcp_check_req(sk, skb, req, prev);
886
887         nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
888                                          th->source, &skb->nh.ipv6h->daddr,
889                                          ntohs(th->dest), inet6_iif(skb));
890
891         if (nsk) {
892                 if (nsk->sk_state != TCP_TIME_WAIT) {
893                         bh_lock_sock(nsk);
894                         return nsk;
895                 }
896                 inet_twsk_put((struct inet_timewait_sock *)nsk);
897                 return NULL;
898         }
899
900 #if 0 /*def CONFIG_SYN_COOKIES*/
901         if (!th->rst && !th->syn && th->ack)
902                 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
903 #endif
904         return sk;
905 }
906
907 /* FIXME: this is substantially similar to the ipv4 code.
908  * Can some kind of merge be done? -- erics
909  */
910 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
911 {
912         struct inet6_request_sock *treq;
913         struct ipv6_pinfo *np = inet6_sk(sk);
914         struct tcp_options_received tmp_opt;
915         struct tcp_sock *tp = tcp_sk(sk);
916         struct request_sock *req = NULL;
917         __u32 isn = TCP_SKB_CB(skb)->when;
918
919         if (skb->protocol == htons(ETH_P_IP))
920                 return tcp_v4_conn_request(sk, skb);
921
922         if (!ipv6_unicast_destination(skb))
923                 goto drop; 
924
925         /*
926          *      There are no SYN attacks on IPv6, yet...        
927          */
928         if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
929                 if (net_ratelimit())
930                         printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
931                 goto drop;              
932         }
933
934         if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
935                 goto drop;
936
937         req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
938         if (req == NULL)
939                 goto drop;
940
941         tcp_clear_options(&tmp_opt);
942         tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
943         tmp_opt.user_mss = tp->rx_opt.user_mss;
944
945         tcp_parse_options(skb, &tmp_opt, 0);
946
947         tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
948         tcp_openreq_init(req, &tmp_opt, skb);
949
950         treq = inet6_rsk(req);
951         ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
952         ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
953         TCP_ECN_create_request(req, skb->h.th);
954         treq->pktopts = NULL;
955         if (ipv6_opt_accepted(sk, skb) ||
956             np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
957             np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
958                 atomic_inc(&skb->users);
959                 treq->pktopts = skb;
960         }
961         treq->iif = sk->sk_bound_dev_if;
962
963         /* So that link locals have meaning */
964         if (!sk->sk_bound_dev_if &&
965             ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
966                 treq->iif = inet6_iif(skb);
967
968         if (isn == 0) 
969                 isn = tcp_v6_init_sequence(sk,skb);
970
971         tcp_rsk(req)->snt_isn = isn;
972
973         if (tcp_v6_send_synack(sk, req, NULL))
974                 goto drop;
975
976         inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
977         return 0;
978
979 drop:
980         if (req)
981                 reqsk_free(req);
982
983         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
984         return 0; /* don't send reset */
985 }
986
987 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
988                                           struct request_sock *req,
989                                           struct dst_entry *dst)
990 {
991         struct inet6_request_sock *treq = inet6_rsk(req);
992         struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
993         struct tcp6_sock *newtcp6sk;
994         struct inet_sock *newinet;
995         struct tcp_sock *newtp;
996         struct sock *newsk;
997         struct ipv6_txoptions *opt;
998
999         if (skb->protocol == htons(ETH_P_IP)) {
1000                 /*
1001                  *      v6 mapped
1002                  */
1003
1004                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1005
1006                 if (newsk == NULL) 
1007                         return NULL;
1008
1009                 newtcp6sk = (struct tcp6_sock *)newsk;
1010                 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1011
1012                 newinet = inet_sk(newsk);
1013                 newnp = inet6_sk(newsk);
1014                 newtp = tcp_sk(newsk);
1015
1016                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1017
1018                 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1019                               newinet->daddr);
1020
1021                 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1022                               newinet->saddr);
1023
1024                 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1025
1026                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1027                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1028                 newnp->pktoptions  = NULL;
1029                 newnp->opt         = NULL;
1030                 newnp->mcast_oif   = inet6_iif(skb);
1031                 newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
1032
1033                 /*
1034                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1035                  * here, tcp_create_openreq_child now does this for us, see the comment in
1036                  * that function for the gory details. -acme
1037                  */
1038
1039                 /* It is tricky place. Until this moment IPv4 tcp
1040                    worked with IPv6 icsk.icsk_af_ops.
1041                    Sync it now.
1042                  */
1043                 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1044
1045                 return newsk;
1046         }
1047
1048         opt = np->opt;
1049
1050         if (sk_acceptq_is_full(sk))
1051                 goto out_overflow;
1052
1053         if (np->rxopt.bits.osrcrt == 2 &&
1054             opt == NULL && treq->pktopts) {
1055                 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1056                 if (rxopt->srcrt)
1057                         opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1058         }
1059
1060         if (dst == NULL) {
1061                 struct in6_addr *final_p = NULL, final;
1062                 struct flowi fl;
1063
1064                 memset(&fl, 0, sizeof(fl));
1065                 fl.proto = IPPROTO_TCP;
1066                 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1067                 if (opt && opt->srcrt) {
1068                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1069                         ipv6_addr_copy(&final, &fl.fl6_dst);
1070                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1071                         final_p = &final;
1072                 }
1073                 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1074                 fl.oif = sk->sk_bound_dev_if;
1075                 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1076                 fl.fl_ip_sport = inet_sk(sk)->sport;
1077
1078                 if (ip6_dst_lookup(sk, &dst, &fl))
1079                         goto out;
1080
1081                 if (final_p)
1082                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1083
1084                 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1085                         goto out;
1086         } 
1087
1088         newsk = tcp_create_openreq_child(sk, req, skb);
1089         if (newsk == NULL)
1090                 goto out;
1091
1092         /*
1093          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1094          * count here, tcp_create_openreq_child now does this for us, see the
1095          * comment in that function for the gory details. -acme
1096          */
1097
1098         ip6_dst_store(newsk, dst, NULL);
1099         newsk->sk_route_caps = dst->dev->features &
1100                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1101
1102         newtcp6sk = (struct tcp6_sock *)newsk;
1103         inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1104
1105         newtp = tcp_sk(newsk);
1106         newinet = inet_sk(newsk);
1107         newnp = inet6_sk(newsk);
1108
1109         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1110
1111         ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1112         ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1113         ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1114         newsk->sk_bound_dev_if = treq->iif;
1115
1116         /* Now IPv6 options... 
1117
1118            First: no IPv4 options.
1119          */
1120         newinet->opt = NULL;
1121
1122         /* Clone RX bits */
1123         newnp->rxopt.all = np->rxopt.all;
1124
1125         /* Clone pktoptions received with SYN */
1126         newnp->pktoptions = NULL;
1127         if (treq->pktopts != NULL) {
1128                 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1129                 kfree_skb(treq->pktopts);
1130                 treq->pktopts = NULL;
1131                 if (newnp->pktoptions)
1132                         skb_set_owner_r(newnp->pktoptions, newsk);
1133         }
1134         newnp->opt        = NULL;
1135         newnp->mcast_oif  = inet6_iif(skb);
1136         newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1137
1138         /* Clone native IPv6 options from listening socket (if any)
1139
1140            Yes, keeping reference count would be much more clever,
1141            but we make one more one thing there: reattach optmem
1142            to newsk.
1143          */
1144         if (opt) {
1145                 newnp->opt = ipv6_dup_options(newsk, opt);
1146                 if (opt != np->opt)
1147                         sock_kfree_s(sk, opt, opt->tot_len);
1148         }
1149
1150         newtp->ext_header_len = 0;
1151         if (newnp->opt)
1152                 newtp->ext_header_len = newnp->opt->opt_nflen +
1153                                         newnp->opt->opt_flen;
1154
1155         tcp_sync_mss(newsk, dst_mtu(dst));
1156         newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1157         tcp_initialize_rcv_mss(newsk);
1158
1159         newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1160
1161         __inet6_hash(&tcp_hashinfo, newsk);
1162         inet_inherit_port(&tcp_hashinfo, sk, newsk);
1163
1164         return newsk;
1165
1166 out_overflow:
1167         NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1168 out:
1169         NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1170         if (opt && opt != np->opt)
1171                 sock_kfree_s(sk, opt, opt->tot_len);
1172         dst_release(dst);
1173         return NULL;
1174 }
1175
1176 static int tcp_v6_checksum_init(struct sk_buff *skb)
1177 {
1178         if (skb->ip_summed == CHECKSUM_HW) {
1179                 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1180                                   &skb->nh.ipv6h->daddr,skb->csum)) {
1181                         skb->ip_summed = CHECKSUM_UNNECESSARY;
1182                         return 0;
1183                 }
1184         }
1185
1186         skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1187                                   &skb->nh.ipv6h->daddr, 0);
1188
1189         if (skb->len <= 76) {
1190                 return __skb_checksum_complete(skb);
1191         }
1192         return 0;
1193 }
1194
1195 /* The socket must have it's spinlock held when we get
1196  * here.
1197  *
1198  * We have a potential double-lock case here, so even when
1199  * doing backlog processing we use the BH locking scheme.
1200  * This is because we cannot sleep with the original spinlock
1201  * held.
1202  */
1203 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1204 {
1205         struct ipv6_pinfo *np = inet6_sk(sk);
1206         struct tcp_sock *tp;
1207         struct sk_buff *opt_skb = NULL;
1208
1209         /* Imagine: socket is IPv6. IPv4 packet arrives,
1210            goes to IPv4 receive handler and backlogged.
1211            From backlog it always goes here. Kerboom...
1212            Fortunately, tcp_rcv_established and rcv_established
1213            handle them correctly, but it is not case with
1214            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1215          */
1216
1217         if (skb->protocol == htons(ETH_P_IP))
1218                 return tcp_v4_do_rcv(sk, skb);
1219
1220         if (sk_filter(sk, skb, 0))
1221                 goto discard;
1222
1223         /*
1224          *      socket locking is here for SMP purposes as backlog rcv
1225          *      is currently called with bh processing disabled.
1226          */
1227
1228         /* Do Stevens' IPV6_PKTOPTIONS.
1229
1230            Yes, guys, it is the only place in our code, where we
1231            may make it not affecting IPv4.
1232            The rest of code is protocol independent,
1233            and I do not like idea to uglify IPv4.
1234
1235            Actually, all the idea behind IPV6_PKTOPTIONS
1236            looks not very well thought. For now we latch
1237            options, received in the last packet, enqueued
1238            by tcp. Feel free to propose better solution.
1239                                                --ANK (980728)
1240          */
1241         if (np->rxopt.all)
1242                 opt_skb = skb_clone(skb, GFP_ATOMIC);
1243
1244         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1245                 TCP_CHECK_TIMER(sk);
1246                 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1247                         goto reset;
1248                 TCP_CHECK_TIMER(sk);
1249                 if (opt_skb)
1250                         goto ipv6_pktoptions;
1251                 return 0;
1252         }
1253
1254         if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1255                 goto csum_err;
1256
1257         if (sk->sk_state == TCP_LISTEN) { 
1258                 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1259                 if (!nsk)
1260                         goto discard;
1261
1262                 /*
1263                  * Queue it on the new socket if the new socket is active,
1264                  * otherwise we just shortcircuit this and continue with
1265                  * the new socket..
1266                  */
1267                 if(nsk != sk) {
1268                         if (tcp_child_process(sk, nsk, skb))
1269                                 goto reset;
1270                         if (opt_skb)
1271                                 __kfree_skb(opt_skb);
1272                         return 0;
1273                 }
1274         }
1275
1276         TCP_CHECK_TIMER(sk);
1277         if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1278                 goto reset;
1279         TCP_CHECK_TIMER(sk);
1280         if (opt_skb)
1281                 goto ipv6_pktoptions;
1282         return 0;
1283
1284 reset:
1285         tcp_v6_send_reset(skb);
1286 discard:
1287         if (opt_skb)
1288                 __kfree_skb(opt_skb);
1289         kfree_skb(skb);
1290         return 0;
1291 csum_err:
1292         TCP_INC_STATS_BH(TCP_MIB_INERRS);
1293         goto discard;
1294
1295
1296 ipv6_pktoptions:
1297         /* Do you ask, what is it?
1298
1299            1. skb was enqueued by tcp.
1300            2. skb is added to tail of read queue, rather than out of order.
1301            3. socket is not in passive state.
1302            4. Finally, it really contains options, which user wants to receive.
1303          */
1304         tp = tcp_sk(sk);
1305         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1306             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1307                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1308                         np->mcast_oif = inet6_iif(opt_skb);
1309                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1310                         np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1311                 if (ipv6_opt_accepted(sk, opt_skb)) {
1312                         skb_set_owner_r(opt_skb, sk);
1313                         opt_skb = xchg(&np->pktoptions, opt_skb);
1314                 } else {
1315                         __kfree_skb(opt_skb);
1316                         opt_skb = xchg(&np->pktoptions, NULL);
1317                 }
1318         }
1319
1320         if (opt_skb)
1321                 kfree_skb(opt_skb);
1322         return 0;
1323 }
1324
1325 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1326 {
1327         struct sk_buff *skb = *pskb;
1328         struct tcphdr *th;      
1329         struct sock *sk;
1330         int ret;
1331
1332         if (skb->pkt_type != PACKET_HOST)
1333                 goto discard_it;
1334
1335         /*
1336          *      Count it even if it's bad.
1337          */
1338         TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1339
1340         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1341                 goto discard_it;
1342
1343         th = skb->h.th;
1344
1345         if (th->doff < sizeof(struct tcphdr)/4)
1346                 goto bad_packet;
1347         if (!pskb_may_pull(skb, th->doff*4))
1348                 goto discard_it;
1349
1350         if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1351              tcp_v6_checksum_init(skb)))
1352                 goto bad_packet;
1353
1354         th = skb->h.th;
1355         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1356         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1357                                     skb->len - th->doff*4);
1358         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1359         TCP_SKB_CB(skb)->when = 0;
1360         TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1361         TCP_SKB_CB(skb)->sacked = 0;
1362
1363         sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
1364                             &skb->nh.ipv6h->daddr, ntohs(th->dest),
1365                             inet6_iif(skb));
1366
1367         if (!sk)
1368                 goto no_tcp_socket;
1369
1370 process:
1371         if (sk->sk_state == TCP_TIME_WAIT)
1372                 goto do_time_wait;
1373
1374         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1375                 goto discard_and_relse;
1376
1377         if (sk_filter(sk, skb, 0))
1378                 goto discard_and_relse;
1379
1380         skb->dev = NULL;
1381
1382         bh_lock_sock(sk);
1383         ret = 0;
1384         if (!sock_owned_by_user(sk)) {
1385                 if (!tcp_prequeue(sk, skb))
1386                         ret = tcp_v6_do_rcv(sk, skb);
1387         } else
1388                 sk_add_backlog(sk, skb);
1389         bh_unlock_sock(sk);
1390
1391         sock_put(sk);
1392         return ret ? -1 : 0;
1393
1394 no_tcp_socket:
1395         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1396                 goto discard_it;
1397
1398         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1399 bad_packet:
1400                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1401         } else {
1402                 tcp_v6_send_reset(skb);
1403         }
1404
1405 discard_it:
1406
1407         /*
1408          *      Discard frame
1409          */
1410
1411         kfree_skb(skb);
1412         return 0;
1413
1414 discard_and_relse:
1415         sock_put(sk);
1416         goto discard_it;
1417
1418 do_time_wait:
1419         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1420                 inet_twsk_put((struct inet_timewait_sock *)sk);
1421                 goto discard_it;
1422         }
1423
1424         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1425                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1426                 inet_twsk_put((struct inet_timewait_sock *)sk);
1427                 goto discard_it;
1428         }
1429
1430         switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1431                                            skb, th)) {
1432         case TCP_TW_SYN:
1433         {
1434                 struct sock *sk2;
1435
1436                 sk2 = inet6_lookup_listener(&tcp_hashinfo,
1437                                             &skb->nh.ipv6h->daddr,
1438                                             ntohs(th->dest), inet6_iif(skb));
1439                 if (sk2 != NULL) {
1440                         struct inet_timewait_sock *tw = inet_twsk(sk);
1441                         inet_twsk_deschedule(tw, &tcp_death_row);
1442                         inet_twsk_put(tw);
1443                         sk = sk2;
1444                         goto process;
1445                 }
1446                 /* Fall through to ACK */
1447         }
1448         case TCP_TW_ACK:
1449                 tcp_v6_timewait_ack(sk, skb);
1450                 break;
1451         case TCP_TW_RST:
1452                 goto no_tcp_socket;
1453         case TCP_TW_SUCCESS:;
1454         }
1455         goto discard_it;
1456 }
1457
1458 static int tcp_v6_remember_stamp(struct sock *sk)
1459 {
1460         /* Alas, not yet... */
1461         return 0;
1462 }
1463
1464 static struct inet_connection_sock_af_ops ipv6_specific = {
1465         .queue_xmit     =       inet6_csk_xmit,
1466         .send_check     =       tcp_v6_send_check,
1467         .rebuild_header =       inet6_sk_rebuild_header,
1468         .conn_request   =       tcp_v6_conn_request,
1469         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1470         .remember_stamp =       tcp_v6_remember_stamp,
1471         .net_header_len =       sizeof(struct ipv6hdr),
1472
1473         .setsockopt     =       ipv6_setsockopt,
1474         .getsockopt     =       ipv6_getsockopt,
1475         .addr2sockaddr  =       inet6_csk_addr2sockaddr,
1476         .sockaddr_len   =       sizeof(struct sockaddr_in6)
1477 };
1478
1479 /*
1480  *      TCP over IPv4 via INET6 API
1481  */
1482
1483 static struct inet_connection_sock_af_ops ipv6_mapped = {
1484         .queue_xmit     =       ip_queue_xmit,
1485         .send_check     =       tcp_v4_send_check,
1486         .rebuild_header =       inet_sk_rebuild_header,
1487         .conn_request   =       tcp_v6_conn_request,
1488         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1489         .remember_stamp =       tcp_v4_remember_stamp,
1490         .net_header_len =       sizeof(struct iphdr),
1491
1492         .setsockopt     =       ipv6_setsockopt,
1493         .getsockopt     =       ipv6_getsockopt,
1494         .addr2sockaddr  =       inet6_csk_addr2sockaddr,
1495         .sockaddr_len   =       sizeof(struct sockaddr_in6)
1496 };
1497
1498
1499
1500 /* NOTE: A lot of things set to zero explicitly by call to
1501  *       sk_alloc() so need not be done here.
1502  */
1503 static int tcp_v6_init_sock(struct sock *sk)
1504 {
1505         struct inet_connection_sock *icsk = inet_csk(sk);
1506         struct tcp_sock *tp = tcp_sk(sk);
1507
1508         skb_queue_head_init(&tp->out_of_order_queue);
1509         tcp_init_xmit_timers(sk);
1510         tcp_prequeue_init(tp);
1511
1512         icsk->icsk_rto = TCP_TIMEOUT_INIT;
1513         tp->mdev = TCP_TIMEOUT_INIT;
1514
1515         /* So many TCP implementations out there (incorrectly) count the
1516          * initial SYN frame in their delayed-ACK and congestion control
1517          * algorithms that we must have the following bandaid to talk
1518          * efficiently to them.  -DaveM
1519          */
1520         tp->snd_cwnd = 2;
1521
1522         /* See draft-stevens-tcpca-spec-01 for discussion of the
1523          * initialization of these values.
1524          */
1525         tp->snd_ssthresh = 0x7fffffff;
1526         tp->snd_cwnd_clamp = ~0;
1527         tp->mss_cache = 536;
1528
1529         tp->reordering = sysctl_tcp_reordering;
1530
1531         sk->sk_state = TCP_CLOSE;
1532
1533         icsk->icsk_af_ops = &ipv6_specific;
1534         icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1535         sk->sk_write_space = sk_stream_write_space;
1536         sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1537
1538         sk->sk_sndbuf = sysctl_tcp_wmem[1];
1539         sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1540
1541         atomic_inc(&tcp_sockets_allocated);
1542
1543         return 0;
1544 }
1545
1546 static int tcp_v6_destroy_sock(struct sock *sk)
1547 {
1548         tcp_v4_destroy_sock(sk);
1549         return inet6_destroy_sock(sk);
1550 }
1551
1552 /* Proc filesystem TCPv6 sock list dumping. */
1553 static void get_openreq6(struct seq_file *seq, 
1554                          struct sock *sk, struct request_sock *req, int i, int uid)
1555 {
1556         int ttd = req->expires - jiffies;
1557         struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1558         struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1559
1560         if (ttd < 0)
1561                 ttd = 0;
1562
1563         seq_printf(seq,
1564                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1565                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1566                    i,
1567                    src->s6_addr32[0], src->s6_addr32[1],
1568                    src->s6_addr32[2], src->s6_addr32[3],
1569                    ntohs(inet_sk(sk)->sport),
1570                    dest->s6_addr32[0], dest->s6_addr32[1],
1571                    dest->s6_addr32[2], dest->s6_addr32[3],
1572                    ntohs(inet_rsk(req)->rmt_port),
1573                    TCP_SYN_RECV,
1574                    0,0, /* could print option size, but that is af dependent. */
1575                    1,   /* timers active (only the expire timer) */  
1576                    jiffies_to_clock_t(ttd), 
1577                    req->retrans,
1578                    uid,
1579                    0,  /* non standard timer */  
1580                    0, /* open_requests have no inode */
1581                    0, req);
1582 }
1583
1584 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1585 {
1586         struct in6_addr *dest, *src;
1587         __u16 destp, srcp;
1588         int timer_active;
1589         unsigned long timer_expires;
1590         struct inet_sock *inet = inet_sk(sp);
1591         struct tcp_sock *tp = tcp_sk(sp);
1592         const struct inet_connection_sock *icsk = inet_csk(sp);
1593         struct ipv6_pinfo *np = inet6_sk(sp);
1594
1595         dest  = &np->daddr;
1596         src   = &np->rcv_saddr;
1597         destp = ntohs(inet->dport);
1598         srcp  = ntohs(inet->sport);
1599
1600         if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1601                 timer_active    = 1;
1602                 timer_expires   = icsk->icsk_timeout;
1603         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1604                 timer_active    = 4;
1605                 timer_expires   = icsk->icsk_timeout;
1606         } else if (timer_pending(&sp->sk_timer)) {
1607                 timer_active    = 2;
1608                 timer_expires   = sp->sk_timer.expires;
1609         } else {
1610                 timer_active    = 0;
1611                 timer_expires = jiffies;
1612         }
1613
1614         seq_printf(seq,
1615                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1616                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
1617                    i,
1618                    src->s6_addr32[0], src->s6_addr32[1],
1619                    src->s6_addr32[2], src->s6_addr32[3], srcp,
1620                    dest->s6_addr32[0], dest->s6_addr32[1],
1621                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
1622                    sp->sk_state, 
1623                    tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
1624                    timer_active,
1625                    jiffies_to_clock_t(timer_expires - jiffies),
1626                    icsk->icsk_retransmits,
1627                    sock_i_uid(sp),
1628                    icsk->icsk_probes_out,
1629                    sock_i_ino(sp),
1630                    atomic_read(&sp->sk_refcnt), sp,
1631                    icsk->icsk_rto,
1632                    icsk->icsk_ack.ato,
1633                    (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1634                    tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
1635                    );
1636 }
1637
1638 static void get_timewait6_sock(struct seq_file *seq, 
1639                                struct inet_timewait_sock *tw, int i)
1640 {
1641         struct in6_addr *dest, *src;
1642         __u16 destp, srcp;
1643         struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
1644         int ttd = tw->tw_ttd - jiffies;
1645
1646         if (ttd < 0)
1647                 ttd = 0;
1648
1649         dest = &tw6->tw_v6_daddr;
1650         src  = &tw6->tw_v6_rcv_saddr;
1651         destp = ntohs(tw->tw_dport);
1652         srcp  = ntohs(tw->tw_sport);
1653
1654         seq_printf(seq,
1655                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1656                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1657                    i,
1658                    src->s6_addr32[0], src->s6_addr32[1],
1659                    src->s6_addr32[2], src->s6_addr32[3], srcp,
1660                    dest->s6_addr32[0], dest->s6_addr32[1],
1661                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
1662                    tw->tw_substate, 0, 0,
1663                    3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
1664                    atomic_read(&tw->tw_refcnt), tw);
1665 }
1666
1667 #ifdef CONFIG_PROC_FS
1668 static int tcp6_seq_show(struct seq_file *seq, void *v)
1669 {
1670         struct tcp_iter_state *st;
1671
1672         if (v == SEQ_START_TOKEN) {
1673                 seq_puts(seq,
1674                          "  sl  "
1675                          "local_address                         "
1676                          "remote_address                        "
1677                          "st tx_queue rx_queue tr tm->when retrnsmt"
1678                          "   uid  timeout inode\n");
1679                 goto out;
1680         }
1681         st = seq->private;
1682
1683         switch (st->state) {
1684         case TCP_SEQ_STATE_LISTENING:
1685         case TCP_SEQ_STATE_ESTABLISHED:
1686                 get_tcp6_sock(seq, v, st->num);
1687                 break;
1688         case TCP_SEQ_STATE_OPENREQ:
1689                 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
1690                 break;
1691         case TCP_SEQ_STATE_TIME_WAIT:
1692                 get_timewait6_sock(seq, v, st->num);
1693                 break;
1694         }
1695 out:
1696         return 0;
1697 }
1698
1699 static struct file_operations tcp6_seq_fops;
1700 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1701         .owner          = THIS_MODULE,
1702         .name           = "tcp6",
1703         .family         = AF_INET6,
1704         .seq_show       = tcp6_seq_show,
1705         .seq_fops       = &tcp6_seq_fops,
1706 };
1707
1708 int __init tcp6_proc_init(void)
1709 {
1710         return tcp_proc_register(&tcp6_seq_afinfo);
1711 }
1712
1713 void tcp6_proc_exit(void)
1714 {
1715         tcp_proc_unregister(&tcp6_seq_afinfo);
1716 }
1717 #endif
1718
1719 struct proto tcpv6_prot = {
1720         .name                   = "TCPv6",
1721         .owner                  = THIS_MODULE,
1722         .close                  = tcp_close,
1723         .connect                = tcp_v6_connect,
1724         .disconnect             = tcp_disconnect,
1725         .accept                 = inet_csk_accept,
1726         .ioctl                  = tcp_ioctl,
1727         .init                   = tcp_v6_init_sock,
1728         .destroy                = tcp_v6_destroy_sock,
1729         .shutdown               = tcp_shutdown,
1730         .setsockopt             = tcp_setsockopt,
1731         .getsockopt             = tcp_getsockopt,
1732         .sendmsg                = tcp_sendmsg,
1733         .recvmsg                = tcp_recvmsg,
1734         .backlog_rcv            = tcp_v6_do_rcv,
1735         .hash                   = tcp_v6_hash,
1736         .unhash                 = tcp_unhash,
1737         .get_port               = tcp_v6_get_port,
1738         .enter_memory_pressure  = tcp_enter_memory_pressure,
1739         .sockets_allocated      = &tcp_sockets_allocated,
1740         .memory_allocated       = &tcp_memory_allocated,
1741         .memory_pressure        = &tcp_memory_pressure,
1742         .orphan_count           = &tcp_orphan_count,
1743         .sysctl_mem             = sysctl_tcp_mem,
1744         .sysctl_wmem            = sysctl_tcp_wmem,
1745         .sysctl_rmem            = sysctl_tcp_rmem,
1746         .max_header             = MAX_TCP_HEADER,
1747         .obj_size               = sizeof(struct tcp6_sock),
1748         .twsk_prot              = &tcp6_timewait_sock_ops,
1749         .rsk_prot               = &tcp6_request_sock_ops,
1750 };
1751
1752 static struct inet6_protocol tcpv6_protocol = {
1753         .handler        =       tcp_v6_rcv,
1754         .err_handler    =       tcp_v6_err,
1755         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1756 };
1757
1758 static struct inet_protosw tcpv6_protosw = {
1759         .type           =       SOCK_STREAM,
1760         .protocol       =       IPPROTO_TCP,
1761         .prot           =       &tcpv6_prot,
1762         .ops            =       &inet6_stream_ops,
1763         .capability     =       -1,
1764         .no_check       =       0,
1765         .flags          =       INET_PROTOSW_PERMANENT,
1766 };
1767
1768 void __init tcpv6_init(void)
1769 {
1770         /* register inet6 protocol */
1771         if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
1772                 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
1773         inet6_register_protosw(&tcpv6_protosw);
1774 }