3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 #include <linux/capability.h>
15 #include <linux/errno.h>
16 #include <linux/types.h>
17 #include <linux/kernel.h>
18 #include <linux/interrupt.h>
19 #include <linux/socket.h>
20 #include <linux/sockios.h>
21 #include <linux/in6.h>
22 #include <linux/ipv6.h>
23 #include <linux/route.h>
24 #include <linux/slab.h>
27 #include <net/ndisc.h>
28 #include <net/addrconf.h>
29 #include <net/transp_v6.h>
30 #include <net/ip6_route.h>
31 #include <net/tcp_states.h>
33 #include <linux/errqueue.h>
34 #include <asm/uaccess.h>
36 int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
38 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
39 struct inet_sock *inet = inet_sk(sk);
40 struct ipv6_pinfo *np = inet6_sk(sk);
41 struct in6_addr *daddr, *final_p = NULL, final;
42 struct dst_entry *dst;
44 struct ip6_flowlabel *flowlabel = NULL;
48 if (usin->sin6_family == AF_INET) {
49 if (__ipv6_only_sock(sk))
51 err = ip4_datagram_connect(sk, uaddr, addr_len);
55 if (addr_len < SIN6_LEN_RFC2133)
58 if (usin->sin6_family != AF_INET6)
61 memset(&fl, 0, sizeof(fl));
63 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
64 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
65 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
66 if (flowlabel == NULL)
68 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
72 addr_type = ipv6_addr_type(&usin->sin6_addr);
74 if (addr_type == IPV6_ADDR_ANY) {
78 usin->sin6_addr.s6_addr[15] = 0x01;
81 daddr = &usin->sin6_addr;
83 if (addr_type == IPV6_ADDR_MAPPED) {
84 struct sockaddr_in sin;
86 if (__ipv6_only_sock(sk)) {
90 sin.sin_family = AF_INET;
91 sin.sin_addr.s_addr = daddr->s6_addr32[3];
92 sin.sin_port = usin->sin6_port;
94 err = ip4_datagram_connect(sk,
95 (struct sockaddr*) &sin,
102 ipv6_addr_set_v4mapped(inet->inet_daddr, &np->daddr);
104 if (ipv6_addr_any(&np->saddr))
105 ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
107 if (ipv6_addr_any(&np->rcv_saddr))
108 ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
114 if (addr_type&IPV6_ADDR_LINKLOCAL) {
115 if (addr_len >= sizeof(struct sockaddr_in6) &&
116 usin->sin6_scope_id) {
117 if (sk->sk_bound_dev_if &&
118 sk->sk_bound_dev_if != usin->sin6_scope_id) {
122 sk->sk_bound_dev_if = usin->sin6_scope_id;
125 if (!sk->sk_bound_dev_if && (addr_type & IPV6_ADDR_MULTICAST))
126 sk->sk_bound_dev_if = np->mcast_oif;
128 /* Connect to link-local address requires an interface */
129 if (!sk->sk_bound_dev_if) {
135 ipv6_addr_copy(&np->daddr, daddr);
136 np->flow_label = fl.fl6_flowlabel;
138 inet->inet_dport = usin->sin6_port;
141 * Check for a route to destination an obtain the
142 * destination cache for it.
145 fl.proto = sk->sk_protocol;
146 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
147 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
148 fl.oif = sk->sk_bound_dev_if;
149 fl.mark = sk->sk_mark;
150 fl.fl_ip_dport = inet->inet_dport;
151 fl.fl_ip_sport = inet->inet_sport;
153 if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST))
154 fl.oif = np->mcast_oif;
156 security_sk_classify_flow(sk, &fl);
159 if (flowlabel->opt && flowlabel->opt->srcrt) {
160 struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt;
161 ipv6_addr_copy(&final, &fl.fl6_dst);
162 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
165 } else if (np->opt && np->opt->srcrt) {
166 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
167 ipv6_addr_copy(&final, &fl.fl6_dst);
168 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
172 err = ip6_dst_lookup(sk, &dst, &fl);
176 ipv6_addr_copy(&fl.fl6_dst, final_p);
178 err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
181 err = ip6_dst_blackhole(sk, &dst, &fl);
186 /* source address lookup done in ip6_dst_lookup */
188 if (ipv6_addr_any(&np->saddr))
189 ipv6_addr_copy(&np->saddr, &fl.fl6_src);
191 if (ipv6_addr_any(&np->rcv_saddr)) {
192 ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src);
193 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
196 ip6_dst_store(sk, dst,
197 ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ?
199 #ifdef CONFIG_IPV6_SUBTREES
200 ipv6_addr_equal(&fl.fl6_src, &np->saddr) ?
205 sk->sk_state = TCP_ESTABLISHED;
207 fl6_sock_release(flowlabel);
211 void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
212 __be16 port, u32 info, u8 *payload)
214 struct ipv6_pinfo *np = inet6_sk(sk);
215 struct icmp6hdr *icmph = icmp6_hdr(skb);
216 struct sock_exterr_skb *serr;
221 skb = skb_clone(skb, GFP_ATOMIC);
225 serr = SKB_EXT_ERR(skb);
226 serr->ee.ee_errno = err;
227 serr->ee.ee_origin = SO_EE_ORIGIN_ICMP6;
228 serr->ee.ee_type = icmph->icmp6_type;
229 serr->ee.ee_code = icmph->icmp6_code;
231 serr->ee.ee_info = info;
232 serr->ee.ee_data = 0;
233 serr->addr_offset = (u8 *)&(((struct ipv6hdr *)(icmph + 1))->daddr) -
234 skb_network_header(skb);
237 __skb_pull(skb, payload - skb->data);
238 skb_reset_transport_header(skb);
240 if (sock_queue_err_skb(sk, skb))
244 void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
246 struct ipv6_pinfo *np = inet6_sk(sk);
247 struct sock_exterr_skb *serr;
254 skb = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
258 skb_put(skb, sizeof(struct ipv6hdr));
259 skb_reset_network_header(skb);
261 ipv6_addr_copy(&iph->daddr, &fl->fl6_dst);
263 serr = SKB_EXT_ERR(skb);
264 serr->ee.ee_errno = err;
265 serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
266 serr->ee.ee_type = 0;
267 serr->ee.ee_code = 0;
269 serr->ee.ee_info = info;
270 serr->ee.ee_data = 0;
271 serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
272 serr->port = fl->fl_ip_dport;
274 __skb_pull(skb, skb_tail_pointer(skb) - skb->data);
275 skb_reset_transport_header(skb);
277 if (sock_queue_err_skb(sk, skb))
281 void ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu)
283 struct ipv6_pinfo *np = inet6_sk(sk);
286 struct ip6_mtuinfo *mtu_info;
288 if (!np->rxopt.bits.rxpmtu)
291 skb = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
295 skb_put(skb, sizeof(struct ipv6hdr));
296 skb_reset_network_header(skb);
298 ipv6_addr_copy(&iph->daddr, &fl->fl6_dst);
300 mtu_info = IP6CBMTU(skb);
306 mtu_info->ip6m_mtu = mtu;
307 mtu_info->ip6m_addr.sin6_family = AF_INET6;
308 mtu_info->ip6m_addr.sin6_port = 0;
309 mtu_info->ip6m_addr.sin6_flowinfo = 0;
310 mtu_info->ip6m_addr.sin6_scope_id = fl->oif;
311 ipv6_addr_copy(&mtu_info->ip6m_addr.sin6_addr, &ipv6_hdr(skb)->daddr);
313 __skb_pull(skb, skb_tail_pointer(skb) - skb->data);
314 skb_reset_transport_header(skb);
316 skb = xchg(&np->rxpmtu, skb);
321 * Handle MSG_ERRQUEUE
323 int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
325 struct ipv6_pinfo *np = inet6_sk(sk);
326 struct sock_exterr_skb *serr;
327 struct sk_buff *skb, *skb2;
328 struct sockaddr_in6 *sin;
330 struct sock_extended_err ee;
331 struct sockaddr_in6 offender;
337 skb = skb_dequeue(&sk->sk_error_queue);
343 msg->msg_flags |= MSG_TRUNC;
346 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
350 sock_recv_timestamp(msg, sk, skb);
352 serr = SKB_EXT_ERR(skb);
354 sin = (struct sockaddr_in6 *)msg->msg_name;
356 const unsigned char *nh = skb_network_header(skb);
357 sin->sin6_family = AF_INET6;
358 sin->sin6_flowinfo = 0;
359 sin->sin6_port = serr->port;
360 sin->sin6_scope_id = 0;
361 if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) {
362 ipv6_addr_copy(&sin->sin6_addr,
363 (struct in6_addr *)(nh + serr->addr_offset));
366 (*(__be32 *)(nh + serr->addr_offset - 24) &
368 if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
369 sin->sin6_scope_id = IP6CB(skb)->iif;
371 ipv6_addr_set_v4mapped(*(__be32 *)(nh + serr->addr_offset),
376 memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
377 sin = &errhdr.offender;
378 sin->sin6_family = AF_UNSPEC;
379 if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) {
380 sin->sin6_family = AF_INET6;
381 sin->sin6_flowinfo = 0;
382 sin->sin6_scope_id = 0;
383 if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) {
384 ipv6_addr_copy(&sin->sin6_addr, &ipv6_hdr(skb)->saddr);
386 datagram_recv_ctl(sk, msg, skb);
387 if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
388 sin->sin6_scope_id = IP6CB(skb)->iif;
390 struct inet_sock *inet = inet_sk(sk);
392 ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
394 if (inet->cmsg_flags)
395 ip_cmsg_recv(msg, skb);
399 put_cmsg(msg, SOL_IPV6, IPV6_RECVERR, sizeof(errhdr), &errhdr);
401 /* Now we could try to dump offended packet options */
403 msg->msg_flags |= MSG_ERRQUEUE;
406 /* Reset and regenerate socket error */
407 spin_lock_bh(&sk->sk_error_queue.lock);
409 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
410 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
411 spin_unlock_bh(&sk->sk_error_queue.lock);
412 sk->sk_error_report(sk);
414 spin_unlock_bh(&sk->sk_error_queue.lock);
424 * Handle IPV6_RECVPATHMTU
426 int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len)
428 struct ipv6_pinfo *np = inet6_sk(sk);
430 struct sockaddr_in6 *sin;
431 struct ip6_mtuinfo mtu_info;
436 skb = xchg(&np->rxpmtu, NULL);
442 msg->msg_flags |= MSG_TRUNC;
445 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
449 sock_recv_timestamp(msg, sk, skb);
451 memcpy(&mtu_info, IP6CBMTU(skb), sizeof(mtu_info));
453 sin = (struct sockaddr_in6 *)msg->msg_name;
455 sin->sin6_family = AF_INET6;
456 sin->sin6_flowinfo = 0;
458 sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id;
459 ipv6_addr_copy(&sin->sin6_addr, &mtu_info.ip6m_addr.sin6_addr);
462 put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info);
473 int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
475 struct ipv6_pinfo *np = inet6_sk(sk);
476 struct inet6_skb_parm *opt = IP6CB(skb);
477 unsigned char *nh = skb_network_header(skb);
479 if (np->rxopt.bits.rxinfo) {
480 struct in6_pktinfo src_info;
482 src_info.ipi6_ifindex = opt->iif;
483 ipv6_addr_copy(&src_info.ipi6_addr, &ipv6_hdr(skb)->daddr);
484 put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
487 if (np->rxopt.bits.rxhlim) {
488 int hlim = ipv6_hdr(skb)->hop_limit;
489 put_cmsg(msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
492 if (np->rxopt.bits.rxtclass) {
493 int tclass = (ntohl(*(__be32 *)ipv6_hdr(skb)) >> 20) & 0xff;
494 put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);
497 if (np->rxopt.bits.rxflow && (*(__be32 *)nh & IPV6_FLOWINFO_MASK)) {
498 __be32 flowinfo = *(__be32 *)nh & IPV6_FLOWINFO_MASK;
499 put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo);
502 /* HbH is allowed only once */
503 if (np->rxopt.bits.hopopts && opt->hop) {
504 u8 *ptr = nh + opt->hop;
505 put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr);
509 (np->rxopt.bits.dstopts || np->rxopt.bits.srcrt)) {
511 * Silly enough, but we need to reparse in order to
512 * report extension headers (except for HbH)
515 * Also note that IPV6_RECVRTHDRDSTOPTS is NOT
516 * (and WILL NOT be) defined because
517 * IPV6_RECVDSTOPTS is more generic. --yoshfuji
519 unsigned int off = sizeof(struct ipv6hdr);
520 u8 nexthdr = ipv6_hdr(skb)->nexthdr;
522 while (off <= opt->lastopt) {
527 case IPPROTO_DSTOPTS:
529 len = (ptr[1] + 1) << 3;
530 if (np->rxopt.bits.dstopts)
531 put_cmsg(msg, SOL_IPV6, IPV6_DSTOPTS, len, ptr);
533 case IPPROTO_ROUTING:
535 len = (ptr[1] + 1) << 3;
536 if (np->rxopt.bits.srcrt)
537 put_cmsg(msg, SOL_IPV6, IPV6_RTHDR, len, ptr);
541 len = (ptr[1] + 2) << 2;
545 len = (ptr[1] + 1) << 3;
553 /* socket options in old style */
554 if (np->rxopt.bits.rxoinfo) {
555 struct in6_pktinfo src_info;
557 src_info.ipi6_ifindex = opt->iif;
558 ipv6_addr_copy(&src_info.ipi6_addr, &ipv6_hdr(skb)->daddr);
559 put_cmsg(msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
561 if (np->rxopt.bits.rxohlim) {
562 int hlim = ipv6_hdr(skb)->hop_limit;
563 put_cmsg(msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim);
565 if (np->rxopt.bits.ohopopts && opt->hop) {
566 u8 *ptr = nh + opt->hop;
567 put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, (ptr[1]+1)<<3, ptr);
569 if (np->rxopt.bits.odstopts && opt->dst0) {
570 u8 *ptr = nh + opt->dst0;
571 put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr);
573 if (np->rxopt.bits.osrcrt && opt->srcrt) {
574 struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(nh + opt->srcrt);
575 put_cmsg(msg, SOL_IPV6, IPV6_2292RTHDR, (rthdr->hdrlen+1) << 3, rthdr);
577 if (np->rxopt.bits.odstopts && opt->dst1) {
578 u8 *ptr = nh + opt->dst1;
579 put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr);
584 int datagram_send_ctl(struct net *net,
585 struct msghdr *msg, struct flowi *fl,
586 struct ipv6_txoptions *opt,
587 int *hlimit, int *tclass, int *dontfrag)
589 struct in6_pktinfo *src_info;
590 struct cmsghdr *cmsg;
591 struct ipv6_rt_hdr *rthdr;
592 struct ipv6_opt_hdr *hdr;
596 for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
599 if (!CMSG_OK(msg, cmsg)) {
604 if (cmsg->cmsg_level != SOL_IPV6)
607 switch (cmsg->cmsg_type) {
609 case IPV6_2292PKTINFO:
611 struct net_device *dev = NULL;
613 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo))) {
618 src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
620 if (src_info->ipi6_ifindex) {
621 if (fl->oif && src_info->ipi6_ifindex != fl->oif)
623 fl->oif = src_info->ipi6_ifindex;
626 addr_type = __ipv6_addr_type(&src_info->ipi6_addr);
630 dev = dev_get_by_index_rcu(net, fl->oif);
635 } else if (addr_type & IPV6_ADDR_LINKLOCAL) {
640 if (addr_type != IPV6_ADDR_ANY) {
641 int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
642 if (!ipv6_chk_addr(net, &src_info->ipi6_addr,
643 strict ? dev : NULL, 0))
646 ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr);
658 if (cmsg->cmsg_len < CMSG_LEN(4)) {
663 if (fl->fl6_flowlabel&IPV6_FLOWINFO_MASK) {
664 if ((fl->fl6_flowlabel^*(__be32 *)CMSG_DATA(cmsg))&~IPV6_FLOWINFO_MASK) {
669 fl->fl6_flowlabel = IPV6_FLOWINFO_MASK & *(__be32 *)CMSG_DATA(cmsg);
672 case IPV6_2292HOPOPTS:
674 if (opt->hopopt || cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) {
679 hdr = (struct ipv6_opt_hdr *)CMSG_DATA(cmsg);
680 len = ((hdr->hdrlen + 1) << 3);
681 if (cmsg->cmsg_len < CMSG_LEN(len)) {
685 if (!capable(CAP_NET_RAW)) {
689 opt->opt_nflen += len;
693 case IPV6_2292DSTOPTS:
694 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) {
699 hdr = (struct ipv6_opt_hdr *)CMSG_DATA(cmsg);
700 len = ((hdr->hdrlen + 1) << 3);
701 if (cmsg->cmsg_len < CMSG_LEN(len)) {
705 if (!capable(CAP_NET_RAW)) {
713 opt->opt_flen += len;
718 case IPV6_RTHDRDSTOPTS:
719 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) {
724 hdr = (struct ipv6_opt_hdr *)CMSG_DATA(cmsg);
725 len = ((hdr->hdrlen + 1) << 3);
726 if (cmsg->cmsg_len < CMSG_LEN(len)) {
730 if (!capable(CAP_NET_RAW)) {
734 if (cmsg->cmsg_type == IPV6_DSTOPTS) {
735 opt->opt_flen += len;
738 opt->opt_nflen += len;
745 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_rt_hdr))) {
750 rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg);
752 switch (rthdr->type) {
753 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
754 case IPV6_SRCRT_TYPE_2:
755 if (rthdr->hdrlen != 2 ||
756 rthdr->segments_left != 1) {
767 len = ((rthdr->hdrlen + 1) << 3);
769 if (cmsg->cmsg_len < CMSG_LEN(len)) {
774 /* segments left must also match */
775 if ((rthdr->hdrlen >> 1) != rthdr->segments_left) {
780 opt->opt_nflen += len;
783 if (cmsg->cmsg_type == IPV6_2292RTHDR && opt->dst1opt) {
784 int dsthdrlen = ((opt->dst1opt->hdrlen+1)<<3);
786 opt->opt_nflen += dsthdrlen;
787 opt->dst0opt = opt->dst1opt;
789 opt->opt_flen -= dsthdrlen;
794 case IPV6_2292HOPLIMIT:
796 if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
801 *hlimit = *(int *)CMSG_DATA(cmsg);
802 if (*hlimit < -1 || *hlimit > 0xff) {
814 if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
818 tc = *(int *)CMSG_DATA(cmsg);
819 if (tc < -1 || tc > 0xff)
833 if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
837 df = *(int *)CMSG_DATA(cmsg);
838 if (df < 0 || df > 1)
847 LIMIT_NETDEBUG(KERN_DEBUG "invalid cmsg type: %d\n",