X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=net%2Fipv4%2Ficmp.c;h=3f50807237e08580d3427d687f9cf82f4da9ce59;hb=4fcd39207f4c91185cc89e3e6a28cbb643034ff1;hp=cee77d606fbe54bf357bd8e909a2e4280a9607d0;hpb=1d1c8d13c4f7690d382bca5de2f9dc88f22a4aab;p=safe%2Fjmp%2Flinux-2.6 diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index cee77d6..3f50807 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1,9 +1,7 @@ /* * NET3: Implementation of the ICMP protocol layer. * - * Alan Cox, - * - * Version: $Id: icmp.c,v 1.85 2002/02/01 22:01:03 davem Exp $ + * Alan Cox, * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -93,6 +91,7 @@ #include #include #include +#include /* * Build xmit assembly blocks @@ -112,12 +111,6 @@ struct icmp_bxm { unsigned char optbuf[40]; }; -/* - * Statistics - */ -DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics) __read_mostly; -DEFINE_SNMP_STAT(struct icmpmsg_mib, icmpmsg_statistics) __read_mostly; - /* An array of errno for error messages from dest unreach. */ /* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */ @@ -188,29 +181,6 @@ struct icmp_err icmp_err_convert[] = { }, }; -/* Control parameters for ECHO replies. */ -int sysctl_icmp_echo_ignore_all __read_mostly; -int sysctl_icmp_echo_ignore_broadcasts __read_mostly = 1; - -/* Control parameter - ignore bogus broadcast responses? */ -int sysctl_icmp_ignore_bogus_error_responses __read_mostly = 1; - -/* - * Configurable global rate limit. - * - * ratelimit defines tokens/packet consumed for dst->rate_token bucket - * ratemask defines which icmp types are ratelimited by setting - * it's bit position. - * - * default: - * dest unreachable (3), source quench (4), - * time exceeded (11), parameter problem (12) - */ - -int sysctl_icmp_ratelimit __read_mostly = 1 * HZ; -int sysctl_icmp_ratemask __read_mostly = 0x1818; -int sysctl_icmp_errors_use_inbound_ifaddr __read_mostly; - /* * ICMP control array. This specifies what to do with each ICMP. */ @@ -234,18 +204,22 @@ static struct sock *icmp_sk(struct net *net) return net->ipv4.icmp_sk[smp_processor_id()]; } -static inline int icmp_xmit_lock(struct sock *sk) +static inline struct sock *icmp_xmit_lock(struct net *net) { + struct sock *sk; + local_bh_disable(); + sk = icmp_sk(net); + if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { /* This can happen if the output path signals a * dst_link_failure() for an outgoing ICMP packet. */ local_bh_enable(); - return 1; + return NULL; } - return 0; + return sk; } static inline void icmp_xmit_unlock(struct sock *sk) @@ -293,7 +267,8 @@ int xrlim_allow(struct dst_entry *dst, int timeout) return rc; } -static inline int icmpv4_xrlim_allow(struct rtable *rt, int type, int code) +static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt, + int type, int code) { struct dst_entry *dst = &rt->u.dst; int rc = 1; @@ -310,8 +285,8 @@ static inline int icmpv4_xrlim_allow(struct rtable *rt, int type, int code) goto out; /* Limit if icmp type is enabled in ratemask. */ - if ((1 << type) & sysctl_icmp_ratemask) - rc = xrlim_allow(dst, sysctl_icmp_ratelimit); + if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) + rc = xrlim_allow(dst, net->ipv4.sysctl_icmp_ratelimit); out: return rc; } @@ -319,10 +294,10 @@ out: /* * Maintain the counters used in the SNMP statistics for outgoing ICMP */ -void icmp_out_count(unsigned char type) +void icmp_out_count(struct net *net, unsigned char type) { - ICMPMSGOUT_INC_STATS(type); - ICMP_INC_STATS(ICMP_MIB_OUTMSGS); + ICMPMSGOUT_INC_STATS(net, type); + ICMP_INC_STATS(net, ICMP_MIB_OUTMSGS); } /* @@ -346,12 +321,12 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd, } static void icmp_push_reply(struct icmp_bxm *icmp_param, - struct ipcm_cookie *ipc, struct rtable *rt) + struct ipcm_cookie *ipc, struct rtable **rt) { struct sock *sk; struct sk_buff *skb; - sk = icmp_sk(rt->u.dst.dev->nd_net); + sk = icmp_sk(dev_net((*rt)->u.dst.dev)); if (ip_append_data(sk, icmp_glue_bits, icmp_param, icmp_param->data_len+icmp_param->head_len, icmp_param->head_len, @@ -381,23 +356,26 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) { struct ipcm_cookie ipc; - struct rtable *rt = (struct rtable *)skb->dst; - struct net *net = rt->u.dst.dev->nd_net; - struct sock *sk = icmp_sk(net); - struct inet_sock *inet = inet_sk(sk); + struct rtable *rt = skb->rtable; + struct net *net = dev_net(rt->u.dst.dev); + struct sock *sk; + struct inet_sock *inet; __be32 daddr; if (ip_options_echo(&icmp_param->replyopts, skb)) return; - if (icmp_xmit_lock(sk)) + sk = icmp_xmit_lock(net); + if (sk == NULL) return; + inet = inet_sk(sk); icmp_param->data.icmph.checksum = 0; inet->tos = ip_hdr(skb)->tos; daddr = ipc.addr = rt->rt_src; ipc.opt = NULL; + ipc.shtx.flags = 0; if (icmp_param->replyopts.optlen) { ipc.opt = &icmp_param->replyopts; if (ipc.opt->srr) @@ -413,9 +391,9 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) if (ip_route_output_key(net, &rt, &fl)) goto out_unlock; } - if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type, + if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type, icmp_param->data.icmph.code)) - icmp_push_reply(icmp_param, &ipc, rt); + icmp_push_reply(icmp_param, &ipc, &rt); ip_rt_put(rt); out_unlock: icmp_xmit_unlock(sk); @@ -438,7 +416,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) struct iphdr *iph; int room; struct icmp_bxm icmp_param; - struct rtable *rt = (struct rtable *)skb_in->dst; + struct rtable *rt = skb_in->rtable; struct ipcm_cookie ipc; __be32 saddr; u8 tos; @@ -447,8 +425,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) if (!rt) goto out; - net = rt->u.dst.dev->nd_net; - sk = icmp_sk(net); + net = dev_net(rt->u.dst.dev); /* * Find the original header. It is expected to be valid, of course. @@ -512,7 +489,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) } } - if (icmp_xmit_lock(sk)) + sk = icmp_xmit_lock(net); + if (sk == NULL) return; /* @@ -523,7 +501,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) if (!(rt->rt_flags & RTCF_LOCAL)) { struct net_device *dev = NULL; - if (rt->fl.iif && sysctl_icmp_errors_use_inbound_ifaddr) + if (rt->fl.iif && + net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) dev = dev_get_by_index(net, rt->fl.iif); if (dev) { @@ -554,6 +533,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) inet_sk(sk)->tos = tos; ipc.addr = iph->saddr; ipc.opt = &icmp_param.replyopts; + ipc.shtx.flags = 0; { struct flowi fl = { @@ -584,7 +564,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) /* No need to clone since we're just using its address. */ rt2 = rt; - err = xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0); + err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0); switch (err) { case 0: if (rt != rt2) @@ -598,7 +578,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) } if (xfrm_decode_session_reverse(skb_in, &fl, AF_INET)) - goto out_unlock; + goto relookup_failed; if (inet_addr_type(net, fl.fl4_src) == RTN_LOCAL) err = __ip_route_output_key(net, &rt2, &fl); @@ -608,7 +588,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) fl2.fl4_dst = fl.fl4_src; if (ip_route_output_key(net, &rt2, &fl2)) - goto out_unlock; + goto relookup_failed; /* Ugh! */ odst = skb_in->dst; @@ -616,30 +596,32 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) RT_TOS(tos), rt2->u.dst.dev); dst_release(&rt2->u.dst); - rt2 = (struct rtable *)skb_in->dst; + rt2 = skb_in->rtable; skb_in->dst = odst; } if (err) - goto out_unlock; + goto relookup_failed; - err = xfrm_lookup((struct dst_entry **)&rt2, &fl, NULL, + err = xfrm_lookup(net, (struct dst_entry **)&rt2, &fl, NULL, XFRM_LOOKUP_ICMP); - if (err == -ENOENT) { + switch (err) { + case 0: + dst_release(&rt->u.dst); + rt = rt2; + break; + case -EPERM: + goto ende; + default: +relookup_failed: if (!rt) goto out_unlock; - goto route_done; + break; } - - dst_release(&rt->u.dst); - rt = rt2; - - if (err) - goto out_unlock; } route_done: - if (!icmpv4_xrlim_allow(rt, type, code)) + if (!icmpv4_xrlim_allow(net, rt, type, code)) goto ende; /* RFC says return as much as we can without exceeding 576 bytes. */ @@ -655,7 +637,7 @@ route_done: icmp_param.data_len = room; icmp_param.head_len = sizeof(struct icmphdr); - icmp_push_reply(&icmp_param, &ipc, rt); + icmp_push_reply(&icmp_param, &ipc, &rt); ende: ip_rt_put(rt); out_unlock: @@ -677,7 +659,7 @@ static void icmp_unreach(struct sk_buff *skb) u32 info = 0; struct net *net; - net = skb->dst->dev->nd_net; + net = dev_net(skb->dst->dev); /* * Incomplete header ? @@ -703,21 +685,19 @@ static void icmp_unreach(struct sk_buff *skb) break; case ICMP_FRAG_NEEDED: if (ipv4_config.no_pmtu_disc) { - LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: " - "fragmentation needed " - "and DF set.\n", - NIPQUAD(iph->daddr)); + LIMIT_NETDEBUG(KERN_INFO "ICMP: %pI4: fragmentation needed and DF set.\n", + &iph->daddr); } else { info = ip_rt_frag_needed(net, iph, - ntohs(icmph->un.frag.mtu)); + ntohs(icmph->un.frag.mtu), + skb->dev); if (!info) goto out; } break; case ICMP_SR_FAILED: - LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: Source " - "Route Failed.\n", - NIPQUAD(iph->daddr)); + LIMIT_NETDEBUG(KERN_INFO "ICMP: %pI4: Source Route Failed.\n", + &iph->daddr); break; default: break; @@ -745,15 +725,15 @@ static void icmp_unreach(struct sk_buff *skb) * get the other vendor to fix their kit. */ - if (!sysctl_icmp_ignore_bogus_error_responses && + if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses && inet_addr_type(net, iph->daddr) == RTN_BROADCAST) { if (net_ratelimit()) - printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP " + printk(KERN_WARNING "%pI4 sent an invalid ICMP " "type %u, code %u " - "error to a broadcast: %u.%u.%u.%u on %s\n", - NIPQUAD(ip_hdr(skb)->saddr), + "error to a broadcast: %pI4 on %s\n", + &ip_hdr(skb)->saddr, icmph->type, icmph->code, - NIPQUAD(iph->daddr), + &iph->daddr, skb->dev->name); goto out; } @@ -782,7 +762,7 @@ static void icmp_unreach(struct sk_buff *skb) out: return; out_err: - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); goto out; } @@ -822,7 +802,7 @@ static void icmp_redirect(struct sk_buff *skb) out: return; out_err: - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS); goto out; } @@ -840,7 +820,10 @@ out_err: static void icmp_echo(struct sk_buff *skb) { - if (!sysctl_icmp_echo_ignore_all) { + struct net *net; + + net = dev_net(skb->dst->dev); + if (!net->ipv4.sysctl_icmp_echo_ignore_all) { struct icmp_bxm icmp_param; icmp_param.data.icmph = *icmp_hdr(skb); @@ -862,7 +845,7 @@ static void icmp_echo(struct sk_buff *skb) */ static void icmp_timestamp(struct sk_buff *skb) { - struct timeval tv; + struct timespec tv; struct icmp_bxm icmp_param; /* * Too short. @@ -873,9 +856,9 @@ static void icmp_timestamp(struct sk_buff *skb) /* * Fill in the current time as ms since midnight UT: */ - do_gettimeofday(&tv); - icmp_param.data.times[1] = htonl((tv.tv_sec % 86400) * 1000 + - tv.tv_usec / 1000); + getnstimeofday(&tv); + icmp_param.data.times[1] = htonl((tv.tv_sec % 86400) * MSEC_PER_SEC + + tv.tv_nsec / NSEC_PER_MSEC); icmp_param.data.times[2] = icmp_param.data.times[1]; if (skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4)) BUG(); @@ -890,7 +873,7 @@ static void icmp_timestamp(struct sk_buff *skb) out: return; out_err: - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(dev_net(skb->dst->dev), ICMP_MIB_INERRORS); goto out; } @@ -943,7 +926,7 @@ static void icmp_address(struct sk_buff *skb) static void icmp_address_reply(struct sk_buff *skb) { - struct rtable *rt = (struct rtable *)skb->dst; + struct rtable *rt = skb->rtable; struct net_device *dev = skb->dev; struct in_device *in_dev; struct in_ifaddr *ifa; @@ -968,9 +951,8 @@ static void icmp_address_reply(struct sk_buff *skb) break; } if (!ifa && net_ratelimit()) { - printk(KERN_INFO "Wrong address mask %u.%u.%u.%u from " - "%s/%u.%u.%u.%u\n", - NIPQUAD(*mp), dev->name, NIPQUAD(rt->rt_src)); + printk(KERN_INFO "Wrong address mask %pI4 from %s/%pI4\n", + mp, dev->name, &rt->rt_src); } } rcu_read_unlock(); @@ -988,12 +970,14 @@ static void icmp_discard(struct sk_buff *skb) int icmp_rcv(struct sk_buff *skb) { struct icmphdr *icmph; - struct rtable *rt = (struct rtable *)skb->dst; + struct rtable *rt = skb->rtable; + struct net *net = dev_net(rt->u.dst.dev); if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { + struct sec_path *sp = skb_sec_path(skb); int nh; - if (!(skb->sp && skb->sp->xvec[skb->sp->len - 1]->props.flags & + if (!(sp && sp->xvec[sp->len - 1]->props.flags & XFRM_STATE_ICMP)) goto drop; @@ -1009,7 +993,7 @@ int icmp_rcv(struct sk_buff *skb) skb_set_network_header(skb, nh); } - ICMP_INC_STATS_BH(ICMP_MIB_INMSGS); + ICMP_INC_STATS_BH(net, ICMP_MIB_INMSGS); switch (skb->ip_summed) { case CHECKSUM_COMPLETE: @@ -1027,7 +1011,7 @@ int icmp_rcv(struct sk_buff *skb) icmph = icmp_hdr(skb); - ICMPMSGIN_INC_STATS_BH(icmph->type); + ICMPMSGIN_INC_STATS_BH(net, icmph->type); /* * 18 is the highest 'known' ICMP type. Anything else is a mystery * @@ -1051,7 +1035,7 @@ int icmp_rcv(struct sk_buff *skb) */ if ((icmph->type == ICMP_ECHO || icmph->type == ICMP_TIMESTAMP) && - sysctl_icmp_echo_ignore_broadcasts) { + net->ipv4.sysctl_icmp_echo_ignore_broadcasts) { goto error; } if (icmph->type != ICMP_ECHO && @@ -1068,7 +1052,7 @@ drop: kfree_skb(skb); return 0; error: - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); goto drop; } @@ -1151,12 +1135,12 @@ static void __net_exit icmp_sk_exit(struct net *net) int i; for_each_possible_cpu(i) - sk_release_kernel(net->ipv4.icmp_sk[i]); + inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]); kfree(net->ipv4.icmp_sk); net->ipv4.icmp_sk = NULL; } -int __net_init icmp_sk_init(struct net *net) +static int __net_init icmp_sk_init(struct net *net) { int i, err; @@ -1167,17 +1151,13 @@ int __net_init icmp_sk_init(struct net *net) for_each_possible_cpu(i) { struct sock *sk; - struct socket *sock; - struct inet_sock *inet; - err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, &sock); + err = inet_ctl_sock_create(&sk, PF_INET, + SOCK_RAW, IPPROTO_ICMP, net); if (err < 0) goto fail; - net->ipv4.icmp_sk[i] = sk = sock->sk; - sk_change_net(sk, net); - - sk->sk_allocation = GFP_ATOMIC; + net->ipv4.icmp_sk[i] = sk; /* Enough space for 2 64K ICMP packets, including * sk_buff struct overhead. @@ -1185,21 +1165,37 @@ int __net_init icmp_sk_init(struct net *net) sk->sk_sndbuf = (2 * ((64 * 1024) + sizeof(struct sk_buff))); - inet = inet_sk(sk); - inet->uc_ttl = -1; - inet->pmtudisc = IP_PMTUDISC_DONT; - - /* Unhash it so that IP input processing does not even - * see it, we do not wish this socket to see incoming - * packets. - */ - sk->sk_prot->unhash(sk); + inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT; } + + /* Control parameters for ECHO replies. */ + net->ipv4.sysctl_icmp_echo_ignore_all = 0; + net->ipv4.sysctl_icmp_echo_ignore_broadcasts = 1; + + /* Control parameter - ignore bogus broadcast responses? */ + net->ipv4.sysctl_icmp_ignore_bogus_error_responses = 1; + + /* + * Configurable global rate limit. + * + * ratelimit defines tokens/packet consumed for dst->rate_token + * bucket ratemask defines which icmp types are ratelimited by + * setting it's bit position. + * + * default: + * dest unreachable (3), source quench (4), + * time exceeded (11), parameter problem (12) + */ + + net->ipv4.sysctl_icmp_ratelimit = 1 * HZ; + net->ipv4.sysctl_icmp_ratemask = 0x1818; + net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0; + return 0; fail: for_each_possible_cpu(i) - sk_release_kernel(net->ipv4.icmp_sk[i]); + inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]); kfree(net->ipv4.icmp_sk); return err; } @@ -1211,10 +1207,9 @@ static struct pernet_operations __net_initdata icmp_sk_ops = { int __init icmp_init(void) { - return register_pernet_device(&icmp_sk_ops); + return register_pernet_subsys(&icmp_sk_ops); } EXPORT_SYMBOL(icmp_err_convert); EXPORT_SYMBOL(icmp_send); -EXPORT_SYMBOL(icmp_statistics); EXPORT_SYMBOL(xrlim_allow);