2 * Internet Control Message Protocol (ICMPv6)
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on net/ipv4/icmp.c
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
21 * Andi Kleen : exception handling
22 * Andi Kleen add rate limits. never reply to a icmp.
23 * add more length checks and other fixes.
24 * yoshfuji : ensure to sent parameter problem for
26 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
28 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
29 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
32 #include <linux/module.h>
33 #include <linux/errno.h>
34 #include <linux/types.h>
35 #include <linux/socket.h>
37 #include <linux/kernel.h>
38 #include <linux/sockios.h>
39 #include <linux/net.h>
40 #include <linux/skbuff.h>
41 #include <linux/init.h>
42 #include <linux/netfilter.h>
45 #include <linux/sysctl.h>
48 #include <linux/inet.h>
49 #include <linux/netdevice.h>
50 #include <linux/icmpv6.h>
56 #include <net/ip6_checksum.h>
57 #include <net/protocol.h>
59 #include <net/rawv6.h>
60 #include <net/transp_v6.h>
61 #include <net/ip6_route.h>
62 #include <net/addrconf.h>
65 #include <net/inet_common.h>
67 #include <asm/uaccess.h>
68 #include <asm/system.h>
71 * The ICMP socket(s). This is the most convenient way to flow control
72 * our ICMP output as well as maintain a clean interface throughout
73 * all layers. All Socketless IP sends will soon be gone.
75 * On SMP we have one ICMP socket per-cpu.
77 static inline struct sock *icmpv6_sk(struct net *net)
79 return net->ipv6.icmp_sk[smp_processor_id()];
82 static int icmpv6_rcv(struct sk_buff *skb);
84 static const struct inet6_protocol icmpv6_protocol = {
85 .handler = icmpv6_rcv,
86 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
89 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
96 if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
97 /* This can happen if the output path (f.e. SIT or
98 * ip6ip6 tunnel) signals dst_link_failure() for an
99 * outgoing ICMP6 packet.
107 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
109 spin_unlock_bh(&sk->sk_lock.slock);
113 * Slightly more convenient version of icmpv6_send.
115 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
117 icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
122 * Figure out, may we reply to this packet with icmp error.
124 * We do not reply, if:
125 * - it was icmp error message.
126 * - it is truncated, so that it is known, that protocol is ICMPV6
127 * (i.e. in the middle of some exthdr)
132 static int is_ineligible(struct sk_buff *skb)
134 int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
135 int len = skb->len - ptr;
136 __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
141 ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr);
144 if (nexthdr == IPPROTO_ICMPV6) {
146 tp = skb_header_pointer(skb,
147 ptr+offsetof(struct icmp6hdr, icmp6_type),
148 sizeof(_type), &_type);
150 !(*tp & ICMPV6_INFOMSG_MASK))
157 * Check the ICMP output rate limit
159 static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type,
162 struct dst_entry *dst;
163 struct net *net = sock_net(sk);
166 /* Informational messages are not limited. */
167 if (type & ICMPV6_INFOMSG_MASK)
170 /* Do not limit pmtu discovery, it would break it. */
171 if (type == ICMPV6_PKT_TOOBIG)
175 * Look up the output route.
176 * XXX: perhaps the expire for routing entries cloned by
177 * this lookup should be more aggressive (not longer than timeout).
179 dst = ip6_route_output(net, sk, fl);
181 IP6_INC_STATS(net, ip6_dst_idev(dst),
182 IPSTATS_MIB_OUTNOROUTES);
183 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
186 struct rt6_info *rt = (struct rt6_info *)dst;
187 int tmo = net->ipv6.sysctl.icmpv6_time;
189 /* Give more bandwidth to wider prefixes. */
190 if (rt->rt6i_dst.plen < 128)
191 tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
193 res = xrlim_allow(dst, tmo);
200 * an inline helper for the "simple" if statement below
201 * checks if parameter problem report is caused by an
202 * unrecognized IPv6 option that has the Option Type
203 * highest-order two bits set to 10
206 static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
210 offset += skb_network_offset(skb);
211 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
214 return (*op & 0xC0) == 0x80;
217 static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len)
220 struct icmp6hdr *icmp6h;
223 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
226 icmp6h = icmp6_hdr(skb);
227 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
228 icmp6h->icmp6_cksum = 0;
230 if (skb_queue_len(&sk->sk_write_queue) == 1) {
231 skb->csum = csum_partial(icmp6h,
232 sizeof(struct icmp6hdr), skb->csum);
233 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
240 skb_queue_walk(&sk->sk_write_queue, skb) {
241 tmp_csum = csum_add(tmp_csum, skb->csum);
244 tmp_csum = csum_partial(icmp6h,
245 sizeof(struct icmp6hdr), tmp_csum);
246 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
251 ip6_push_pending_frames(sk);
262 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
264 struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
265 struct sk_buff *org_skb = msg->skb;
268 csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
270 skb->csum = csum_block_add(skb->csum, csum, odd);
271 if (!(msg->type & ICMPV6_INFOMSG_MASK))
272 nf_ct_attach(skb, org_skb);
276 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
277 static void mip6_addr_swap(struct sk_buff *skb)
279 struct ipv6hdr *iph = ipv6_hdr(skb);
280 struct inet6_skb_parm *opt = IP6CB(skb);
281 struct ipv6_destopt_hao *hao;
286 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
287 if (likely(off >= 0)) {
288 hao = (struct ipv6_destopt_hao *)
289 (skb_network_header(skb) + off);
290 ipv6_addr_copy(&tmp, &iph->saddr);
291 ipv6_addr_copy(&iph->saddr, &hao->addr);
292 ipv6_addr_copy(&hao->addr, &tmp);
297 static inline void mip6_addr_swap(struct sk_buff *skb) {}
301 * Send an ICMP message in response to a packet in error
303 void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
304 struct net_device *dev)
306 struct net *net = dev_net(skb->dev);
307 struct inet6_dev *idev = NULL;
308 struct ipv6hdr *hdr = ipv6_hdr(skb);
310 struct ipv6_pinfo *np;
311 struct in6_addr *saddr = NULL;
312 struct dst_entry *dst;
313 struct dst_entry *dst2;
314 struct icmp6hdr tmp_hdr;
317 struct icmpv6_msg msg;
324 if ((u8 *)hdr < skb->head ||
325 (skb->network_header + sizeof(*hdr)) > skb->tail)
329 * Make sure we respect the rules
330 * i.e. RFC 1885 2.4(e)
331 * Rule (e.1) is enforced by not using icmpv6_send
332 * in any code that processes icmp errors.
334 addr_type = ipv6_addr_type(&hdr->daddr);
336 if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0))
343 if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
344 if (type != ICMPV6_PKT_TOOBIG &&
345 !(type == ICMPV6_PARAMPROB &&
346 code == ICMPV6_UNK_OPTION &&
347 (opt_unrec(skb, info))))
353 addr_type = ipv6_addr_type(&hdr->saddr);
359 if (addr_type & IPV6_ADDR_LINKLOCAL)
360 iif = skb->dev->ifindex;
363 * Must not send error if the source does not uniquely
364 * identify a single node (RFC2463 Section 2.4).
365 * We check unspecified / multicast addresses here,
366 * and anycast addresses will be checked later.
368 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
369 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
374 * Never answer to a ICMP packet.
376 if (is_ineligible(skb)) {
377 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
383 memset(&fl, 0, sizeof(fl));
384 fl.proto = IPPROTO_ICMPV6;
385 ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
387 ipv6_addr_copy(&fl.fl6_src, saddr);
389 fl.fl_icmp_type = type;
390 fl.fl_icmp_code = code;
391 security_skb_classify_flow(skb, &fl);
393 sk = icmpv6_xmit_lock(net);
398 if (!icmpv6_xrlim_allow(sk, type, &fl))
401 tmp_hdr.icmp6_type = type;
402 tmp_hdr.icmp6_code = code;
403 tmp_hdr.icmp6_cksum = 0;
404 tmp_hdr.icmp6_pointer = htonl(info);
406 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
407 fl.oif = np->mcast_oif;
409 err = ip6_dst_lookup(sk, &dst, &fl);
414 * We won't send icmp if the destination is known
417 if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
418 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
419 goto out_dst_release;
422 /* No need to clone since we're just using its address. */
425 err = xfrm_lookup(net, &dst, &fl, sk, 0);
438 if (xfrm_decode_session_reverse(skb, &fl2, AF_INET6))
439 goto relookup_failed;
441 if (ip6_dst_lookup(sk, &dst2, &fl2))
442 goto relookup_failed;
444 err = xfrm_lookup(net, &dst2, &fl2, sk, XFRM_LOOKUP_ICMP);
451 goto out_dst_release;
460 if (ipv6_addr_is_multicast(&fl.fl6_dst))
461 hlimit = np->mcast_hops;
463 hlimit = np->hop_limit;
465 hlimit = ip6_dst_hoplimit(dst);
468 msg.offset = skb_network_offset(skb);
471 len = skb->len - msg.offset;
472 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
474 LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
475 goto out_dst_release;
478 idev = in6_dev_get(skb->dev);
480 err = ip6_append_data(sk, icmpv6_getfrag, &msg,
481 len + sizeof(struct icmp6hdr),
482 sizeof(struct icmp6hdr), hlimit,
483 np->tclass, NULL, &fl, (struct rt6_info*)dst,
486 ip6_flush_pending_frames(sk);
489 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr));
492 if (likely(idev != NULL))
497 icmpv6_xmit_unlock(sk);
500 EXPORT_SYMBOL(icmpv6_send);
502 static void icmpv6_echo_reply(struct sk_buff *skb)
504 struct net *net = dev_net(skb->dev);
506 struct inet6_dev *idev;
507 struct ipv6_pinfo *np;
508 struct in6_addr *saddr = NULL;
509 struct icmp6hdr *icmph = icmp6_hdr(skb);
510 struct icmp6hdr tmp_hdr;
512 struct icmpv6_msg msg;
513 struct dst_entry *dst;
517 saddr = &ipv6_hdr(skb)->daddr;
519 if (!ipv6_unicast_destination(skb))
522 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
523 tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
525 memset(&fl, 0, sizeof(fl));
526 fl.proto = IPPROTO_ICMPV6;
527 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
529 ipv6_addr_copy(&fl.fl6_src, saddr);
530 fl.oif = skb->dev->ifindex;
531 fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
532 security_skb_classify_flow(skb, &fl);
534 sk = icmpv6_xmit_lock(net);
539 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
540 fl.oif = np->mcast_oif;
542 err = ip6_dst_lookup(sk, &dst, &fl);
545 if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0)
548 if (ipv6_addr_is_multicast(&fl.fl6_dst))
549 hlimit = np->mcast_hops;
551 hlimit = np->hop_limit;
553 hlimit = ip6_dst_hoplimit(dst);
555 idev = in6_dev_get(skb->dev);
559 msg.type = ICMPV6_ECHO_REPLY;
561 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
562 sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl,
563 (struct rt6_info*)dst, MSG_DONTWAIT);
566 ip6_flush_pending_frames(sk);
569 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
572 if (likely(idev != NULL))
576 icmpv6_xmit_unlock(sk);
579 static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
581 const struct inet6_protocol *ipprot;
586 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
589 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
590 if (ipv6_ext_hdr(nexthdr)) {
591 /* now skip over extension headers */
592 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
596 inner_offset = sizeof(struct ipv6hdr);
599 /* Checkin header including 8 bytes of inner protocol header. */
600 if (!pskb_may_pull(skb, inner_offset+8))
603 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
604 Without this we will not able f.e. to make source routed
606 Corresponding argument (opt) to notifiers is already added.
610 hash = nexthdr & (MAX_INET_PROTOS - 1);
613 ipprot = rcu_dereference(inet6_protos[hash]);
614 if (ipprot && ipprot->err_handler)
615 ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
618 raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
622 * Handle icmp messages
625 static int icmpv6_rcv(struct sk_buff *skb)
627 struct net_device *dev = skb->dev;
628 struct inet6_dev *idev = __in6_dev_get(dev);
629 struct in6_addr *saddr, *daddr;
630 struct ipv6hdr *orig_hdr;
631 struct icmp6hdr *hdr;
634 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
635 struct sec_path *sp = skb_sec_path(skb);
638 if (!(sp && sp->xvec[sp->len - 1]->props.flags &
642 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr)))
645 nh = skb_network_offset(skb);
646 skb_set_network_header(skb, sizeof(*hdr));
648 if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
651 skb_set_network_header(skb, nh);
654 ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS);
656 saddr = &ipv6_hdr(skb)->saddr;
657 daddr = &ipv6_hdr(skb)->daddr;
659 /* Perform checksum. */
660 switch (skb->ip_summed) {
661 case CHECKSUM_COMPLETE:
662 if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
667 skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
669 if (__skb_checksum_complete(skb)) {
670 LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n",
676 if (!pskb_pull(skb, sizeof(*hdr)))
679 hdr = icmp6_hdr(skb);
681 type = hdr->icmp6_type;
683 ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type);
686 case ICMPV6_ECHO_REQUEST:
687 icmpv6_echo_reply(skb);
690 case ICMPV6_ECHO_REPLY:
691 /* we couldn't care less */
694 case ICMPV6_PKT_TOOBIG:
695 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
696 standard destination cache. Seems, only "advanced"
697 destination cache will allow to solve this problem
700 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
702 hdr = icmp6_hdr(skb);
703 orig_hdr = (struct ipv6hdr *) (hdr + 1);
704 rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
705 ntohl(hdr->icmp6_mtu));
708 * Drop through to notify
711 case ICMPV6_DEST_UNREACH:
712 case ICMPV6_TIME_EXCEED:
713 case ICMPV6_PARAMPROB:
714 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
717 case NDISC_ROUTER_SOLICITATION:
718 case NDISC_ROUTER_ADVERTISEMENT:
719 case NDISC_NEIGHBOUR_SOLICITATION:
720 case NDISC_NEIGHBOUR_ADVERTISEMENT:
725 case ICMPV6_MGM_QUERY:
726 igmp6_event_query(skb);
729 case ICMPV6_MGM_REPORT:
730 igmp6_event_report(skb);
733 case ICMPV6_MGM_REDUCTION:
734 case ICMPV6_NI_QUERY:
735 case ICMPV6_NI_REPLY:
736 case ICMPV6_MLD2_REPORT:
737 case ICMPV6_DHAAD_REQUEST:
738 case ICMPV6_DHAAD_REPLY:
739 case ICMPV6_MOBILE_PREFIX_SOL:
740 case ICMPV6_MOBILE_PREFIX_ADV:
744 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
747 if (type & ICMPV6_INFOMSG_MASK)
751 * error of unknown type.
752 * must pass to upper level
755 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
762 ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
768 void icmpv6_flow_init(struct sock *sk, struct flowi *fl,
770 const struct in6_addr *saddr,
771 const struct in6_addr *daddr,
774 memset(fl, 0, sizeof(*fl));
775 ipv6_addr_copy(&fl->fl6_src, saddr);
776 ipv6_addr_copy(&fl->fl6_dst, daddr);
777 fl->proto = IPPROTO_ICMPV6;
778 fl->fl_icmp_type = type;
779 fl->fl_icmp_code = 0;
781 security_sk_classify_flow(sk, fl);
785 * Special lock-class for __icmpv6_sk:
787 static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
789 static int __net_init icmpv6_sk_init(struct net *net)
795 kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
796 if (net->ipv6.icmp_sk == NULL)
799 for_each_possible_cpu(i) {
800 err = inet_ctl_sock_create(&sk, PF_INET6,
801 SOCK_RAW, IPPROTO_ICMPV6, net);
804 "Failed to initialize the ICMP6 control socket "
810 net->ipv6.icmp_sk[i] = sk;
813 * Split off their lock-class, because sk->sk_dst_lock
814 * gets used from softirqs, which is safe for
815 * __icmpv6_sk (because those never get directly used
816 * via userspace syscalls), but unsafe for normal sockets.
818 lockdep_set_class(&sk->sk_dst_lock,
819 &icmpv6_socket_sk_dst_lock_key);
821 /* Enough space for 2 64K ICMP packets, including
822 * sk_buff struct overhead.
825 (2 * ((64 * 1024) + sizeof(struct sk_buff)));
830 for (j = 0; j < i; j++)
831 inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
832 kfree(net->ipv6.icmp_sk);
836 static void __net_exit icmpv6_sk_exit(struct net *net)
840 for_each_possible_cpu(i) {
841 inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
843 kfree(net->ipv6.icmp_sk);
846 static struct pernet_operations icmpv6_sk_ops = {
847 .init = icmpv6_sk_init,
848 .exit = icmpv6_sk_exit,
851 int __init icmpv6_init(void)
855 err = register_pernet_subsys(&icmpv6_sk_ops);
860 if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
865 printk(KERN_ERR "Failed to register ICMP6 protocol\n");
866 unregister_pernet_subsys(&icmpv6_sk_ops);
870 void icmpv6_cleanup(void)
872 unregister_pernet_subsys(&icmpv6_sk_ops);
873 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
877 static const struct icmp6_err {
885 { /* ADM_PROHIBITED */
889 { /* Was NOT_NEIGHBOUR, now reserved */
903 int icmpv6_err_convert(u8 type, u8 code, int *err)
910 case ICMPV6_DEST_UNREACH:
912 if (code <= ICMPV6_PORT_UNREACH) {
913 *err = tab_unreach[code].err;
914 fatal = tab_unreach[code].fatal;
918 case ICMPV6_PKT_TOOBIG:
922 case ICMPV6_PARAMPROB:
927 case ICMPV6_TIME_EXCEED:
935 EXPORT_SYMBOL(icmpv6_err_convert);
938 ctl_table ipv6_icmp_table_template[] = {
940 .procname = "ratelimit",
941 .data = &init_net.ipv6.sysctl.icmpv6_time,
942 .maxlen = sizeof(int),
944 .proc_handler = proc_dointvec_ms_jiffies,
949 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
951 struct ctl_table *table;
953 table = kmemdup(ipv6_icmp_table_template,
954 sizeof(ipv6_icmp_table_template),
958 table[0].data = &net->ipv6.sysctl.icmpv6_time;