X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=net%2Fipv4%2Froute.c;h=56d6602affb445ba5f79d88adb0c5a1230d88a14;hb=e905a9edab7f4f14f9213b52234e4a346c690911;hp=c42ed803c3eb6ee56c1c6e4d7654fa869327b120;hpb=e448515c79c3785eae225c25e8cd5b90b470d0a6;p=safe%2Fjmp%2Flinux-2.6 diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c42ed80..56d6602 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -20,7 +20,7 @@ * (rco@di.uminho.pt) Routing table insertion and update * Linus Torvalds : Rewrote bits to be sensible * Alan Cox : Added BSD route gw semantics - * Alan Cox : Super /proc >4K + * Alan Cox : Super /proc >4K * Alan Cox : MTU in route table * Alan Cox : MSS actually. Also added the window * clamper. @@ -38,7 +38,7 @@ * Alan Cox : Faster /proc handling * Alexey Kuznetsov : Massive rework to support tree based routing, * routing caches and better behaviour. - * + * * Olaf Erb : irtt wasn't being copied right. * Bjorn Ekwall : Kerneld route support. * Alan Cox : Multicast fixed (I hope) @@ -361,8 +361,8 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) dev_queue_xmit) : 0, r->rt_spec_dst); seq_printf(seq, "%-127s\n", temp); - } - return 0; + } + return 0; } static struct seq_operations rt_cache_seq_ops = { @@ -429,7 +429,7 @@ static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) return &per_cpu(rt_cache_stat, cpu); } return NULL; - + } static void rt_cpu_seq_stop(struct seq_file *seq, void *v) @@ -445,7 +445,7 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n"); return 0; } - + seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x " " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n", atomic_read(&ipv4_dst_ops.entries), @@ -459,7 +459,7 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v) st->out_hit, st->out_slow_tot, - st->out_slow_mc, + st->out_slow_mc, st->gc_total, st->gc_ignored, @@ -493,7 +493,7 @@ static struct file_operations rt_cpu_seq_fops = { }; #endif /* CONFIG_PROC_FS */ - + static __inline__ void rt_free(struct rtable *rt) { multipath_remove(rt); @@ -566,9 +566,13 @@ static inline u32 rt_score(struct rtable *rt) static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) { - return memcmp(&fl1->nl_u.ip4_u, &fl2->nl_u.ip4_u, sizeof(fl1->nl_u.ip4_u)) == 0 && - fl1->oif == fl2->oif && - fl1->iif == fl2->iif; + return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | + (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr)) | + (fl1->mark ^ fl2->mark) | + (*(u16 *)&fl1->nl_u.ip4_u.tos ^ + *(u16 *)&fl2->nl_u.ip4_u.tos) | + (fl1->oif ^ fl2->oif) | + (fl1->iif ^ fl2->iif)) == 0; } #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED @@ -668,8 +672,8 @@ static void rt_check_expire(unsigned long dummy) rt_free(rth); } #else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ - *rthp = rth->u.rt_next; - rt_free(rth); + *rthp = rth->u.rt_next; + rt_free(rth); #endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ } spin_unlock(rt_hash_lock_addr(i)); @@ -735,7 +739,7 @@ void rt_cache_flush(int delay) if (user_mode && tmo < ip_rt_max_delay-ip_rt_min_delay) tmo = 0; - + if (delay > tmo) delay = tmo; } @@ -1100,7 +1104,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) return; } } else - printk(KERN_DEBUG "rt_bind_peer(0) @%p\n", + printk(KERN_DEBUG "rt_bind_peer(0) @%p\n", __builtin_return_address(0)); ip_select_fb_ident(iph); @@ -1186,7 +1190,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, /* Copy all the information. */ *rt = *rth; - INIT_RCU_HEAD(&rt->u.dst.rcu_head); + INIT_RCU_HEAD(&rt->u.dst.rcu_head); rt->u.dst.__use = 1; atomic_set(&rt->u.dst.__refcnt, 1); rt->u.dst.child = NULL; @@ -1221,11 +1225,11 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rt_drop(rt); goto do_next; } - + netevent.old = &rth->u.dst; netevent.new = &rt->u.dst; - call_netevent_notifiers(NETEVENT_REDIRECT, - &netevent); + call_netevent_notifiers(NETEVENT_REDIRECT, + &netevent); rt_del(hash, rth); if (!rt_intern_hash(hash, rt, &rt)) @@ -1321,7 +1325,8 @@ void ip_rt_send_redirect(struct sk_buff *skb) /* Check for load limit; set rate_last to the latest sent * redirect. */ - if (time_after(jiffies, + if (rt->u.dst.rate_tokens == 0 || + time_after(jiffies, (rt->u.dst.rate_last + (ip_rt_redirect_load << rt->u.dst.rate_tokens)))) { icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); @@ -1338,7 +1343,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) #endif } out: - in_dev_put(in_dev); + in_dev_put(in_dev); } static int ip_error(struct sk_buff *skb) @@ -1374,7 +1379,7 @@ static int ip_error(struct sk_buff *skb) out: kfree_skb(skb); return 0; -} +} /* * The last two values are not from the RFC but @@ -1387,7 +1392,7 @@ static const unsigned short mtu_plateau[] = static __inline__ unsigned short guess_mtu(unsigned short old_mtu) { int i; - + for (i = 0; i < ARRAY_SIZE(mtu_plateau); i++) if (old_mtu > mtu_plateau[i]) return mtu_plateau[i]; @@ -1431,7 +1436,7 @@ unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu) mtu = guess_mtu(old_mtu); } if (mtu <= rth->u.dst.metrics[RTAX_MTU-1]) { - if (mtu < rth->u.dst.metrics[RTAX_MTU-1]) { + if (mtu < rth->u.dst.metrics[RTAX_MTU-1]) { dst_confirm(&rth->u.dst); if (mtu < ip_rt_min_pmtu) { mtu = ip_rt_min_pmtu; @@ -1595,7 +1600,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) #endif set_class_tag(rt, itag); #endif - rt->rt_type = res->type; + rt->rt_type = res->type; } static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, @@ -1637,9 +1642,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; rth->fl.fl4_tos = tos; -#ifdef CONFIG_IP_ROUTE_FWMARK - rth->fl.fl4_fwmark= skb->nfmark; -#endif + rth->fl.mark = skb->mark; rth->fl.fl4_src = saddr; rth->rt_src = saddr; #ifdef CONFIG_NET_CLS_ROUTE @@ -1711,11 +1714,11 @@ static void ip_handle_martian_source(struct net_device *dev, #endif } -static inline int __mkroute_input(struct sk_buff *skb, - struct fib_result* res, - struct in_device *in_dev, +static inline int __mkroute_input(struct sk_buff *skb, + struct fib_result* res, + struct in_device *in_dev, __be32 daddr, __be32 saddr, u32 tos, - struct rtable **result) + struct rtable **result) { struct rtable *rth; @@ -1735,12 +1738,12 @@ static inline int __mkroute_input(struct sk_buff *skb, } - err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res), + err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res), in_dev->dev, &spec_dst, &itag); if (err < 0) { - ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, + ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, saddr); - + err = -EINVAL; goto cleanup; } @@ -1778,14 +1781,12 @@ static inline int __mkroute_input(struct sk_buff *skb, #endif if (in_dev->cnf.no_policy) rth->u.dst.flags |= DST_NOPOLICY; - if (in_dev->cnf.no_xfrm) + if (out_dev->cnf.no_xfrm) rth->u.dst.flags |= DST_NOXFRM; rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; rth->fl.fl4_tos = tos; -#ifdef CONFIG_IP_ROUTE_FWMARK - rth->fl.fl4_fwmark= skb->nfmark; -#endif + rth->fl.mark = skb->mark; rth->fl.fl4_src = saddr; rth->rt_src = saddr; rth->rt_gateway = daddr; @@ -1810,10 +1811,10 @@ static inline int __mkroute_input(struct sk_buff *skb, /* release the working reference to the output device */ in_dev_put(out_dev); return err; -} +} -static inline int ip_mkroute_input_def(struct sk_buff *skb, - struct fib_result* res, +static inline int ip_mkroute_input_def(struct sk_buff *skb, + struct fib_result* res, const struct flowi *fl, struct in_device *in_dev, __be32 daddr, __be32 saddr, u32 tos) @@ -1834,11 +1835,11 @@ static inline int ip_mkroute_input_def(struct sk_buff *skb, /* put it into the cache */ hash = rt_hash(daddr, saddr, fl->iif); - return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); + return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); } -static inline int ip_mkroute_input(struct sk_buff *skb, - struct fib_result* res, +static inline int ip_mkroute_input(struct sk_buff *skb, + struct fib_result* res, const struct flowi *fl, struct in_device *in_dev, __be32 daddr, __be32 saddr, u32 tos) @@ -1858,7 +1859,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb, if (hopcount < 2) return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, saddr, tos); - + /* add all alternatives to the routing cache */ for (hop = 0; hop < hopcount; hop++) { res->nh_sel = hop; @@ -1914,10 +1915,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, .saddr = saddr, .tos = tos, .scope = RT_SCOPE_UNIVERSE, -#ifdef CONFIG_IP_ROUTE_FWMARK - .fwmark = skb->nfmark -#endif } }, + .mark = skb->mark, .iif = dev->ifindex }; unsigned flags = 0; u32 itag = 0; @@ -1989,7 +1988,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, goto e_nobufs; if (err == -EINVAL) goto e_inval; - + done: in_dev_put(in_dev); if (free_res) @@ -2028,9 +2027,7 @@ local_input: rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; rth->fl.fl4_tos = tos; -#ifdef CONFIG_IP_ROUTE_FWMARK - rth->fl.fl4_fwmark= skb->nfmark; -#endif + rth->fl.mark = skb->mark; rth->fl.fl4_src = saddr; rth->rt_src = saddr; #ifdef CONFIG_NET_CLS_ROUTE @@ -2074,8 +2071,8 @@ martian_destination: #endif e_hostunreach: - err = -EHOSTUNREACH; - goto done; + err = -EHOSTUNREACH; + goto done; e_inval: err = -EINVAL; @@ -2107,9 +2104,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->fl.fl4_src == saddr && rth->fl.iif == iif && rth->fl.oif == 0 && -#ifdef CONFIG_IP_ROUTE_FWMARK - rth->fl.fl4_fwmark == skb->nfmark && -#endif + rth->fl.mark == skb->mark && rth->fl.fl4_tos == tos) { rth->u.dst.lastuse = jiffies; dst_hold(&rth->u.dst); @@ -2158,11 +2153,11 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, } static inline int __mkroute_output(struct rtable **result, - struct fib_result* res, + struct fib_result* res, const struct flowi *fl, - const struct flowi *oldflp, - struct net_device *dev_out, - unsigned flags) + const struct flowi *oldflp, + struct net_device *dev_out, + unsigned flags) { struct rtable *rth; struct in_device *in_dev; @@ -2195,7 +2190,7 @@ static inline int __mkroute_output(struct rtable **result, } } else if (res->type == RTN_MULTICAST) { flags |= RTCF_MULTICAST|RTCF_LOCAL; - if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, + if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, oldflp->proto)) flags &= ~RTCF_LOCAL; /* If multicast route do not exist use @@ -2213,7 +2208,7 @@ static inline int __mkroute_output(struct rtable **result, if (!rth) { err = -ENOBUFS; goto cleanup; - } + } atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; @@ -2233,13 +2228,11 @@ static inline int __mkroute_output(struct rtable **result, rth->fl.fl4_tos = tos; rth->fl.fl4_src = oldflp->fl4_src; rth->fl.oif = oldflp->oif; -#ifdef CONFIG_IP_ROUTE_FWMARK - rth->fl.fl4_fwmark= oldflp->fl4_fwmark; -#endif + rth->fl.mark = oldflp->mark; rth->rt_dst = fl->fl4_dst; rth->rt_src = fl->fl4_src; rth->rt_iif = oldflp->oif ? : dev_out->ifindex; - /* get references to the devices that are to be hold by the routing + /* get references to the devices that are to be hold by the routing cache entry */ rth->u.dst.dev = dev_out; dev_hold(dev_out); @@ -2257,7 +2250,7 @@ static inline int __mkroute_output(struct rtable **result, } if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { rth->rt_spec_dst = fl->fl4_src; - if (flags & RTCF_LOCAL && + if (flags & RTCF_LOCAL && !(dev_out->flags & IFF_LOOPBACK)) { rth->u.dst.output = ip_mc_output; RT_CACHE_STAT_INC(out_slow_mc); @@ -2299,7 +2292,7 @@ static inline int ip_mkroute_output_def(struct rtable **rp, hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif); err = rt_intern_hash(hash, rth, rp); } - + return err; } @@ -2379,10 +2372,8 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) .scope = ((tos & RTO_ONLINK) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE), -#ifdef CONFIG_IP_ROUTE_FWMARK - .fwmark = oldflp->fl4_fwmark -#endif } }, + .mark = oldflp->mark, .iif = loopback_dev.ifindex, .oif = oldflp->oif }; struct fib_result res; @@ -2577,9 +2568,7 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp) rth->fl.fl4_src == flp->fl4_src && rth->fl.iif == 0 && rth->fl.oif == flp->oif && -#ifdef CONFIG_IP_ROUTE_FWMARK - rth->fl.fl4_fwmark == flp->fl4_fwmark && -#endif + rth->fl.mark == flp->mark && !((rth->fl.fl4_tos ^ flp->fl4_tos) & (IPTOS_RT_MASK | RTO_ONLINK))) { @@ -2641,11 +2630,12 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, struct rtable *rt = (struct rtable*)skb->dst; struct rtmsg *r; struct nlmsghdr *nlh; - struct rta_cacheinfo ci; + long expires; + u32 id = 0, ts = 0, tsage = 0, error; nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; r = nlmsg_data(nlh); r->rtm_family = AF_INET; @@ -2661,11 +2651,11 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, if (rt->rt_flags & RTCF_NOTIFY) r->rtm_flags |= RTM_F_NOTIFY; - NLA_PUT_U32(skb, RTA_DST, rt->rt_dst); + NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst); if (rt->fl.fl4_src) { r->rtm_src_len = 32; - NLA_PUT_U32(skb, RTA_SRC, rt->fl.fl4_src); + NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src); } if (rt->u.dst.dev) NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex); @@ -2678,30 +2668,23 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, NLA_PUT_U32(skb, RTA_MP_ALGO, rt->rt_multipath_alg); #endif if (rt->fl.iif) - NLA_PUT_U32(skb, RTA_PREFSRC, rt->rt_spec_dst); + NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); else if (rt->rt_src != rt->fl.fl4_src) - NLA_PUT_U32(skb, RTA_PREFSRC, rt->rt_src); + NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src); if (rt->rt_dst != rt->rt_gateway) - NLA_PUT_U32(skb, RTA_GATEWAY, rt->rt_gateway); + NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) goto nla_put_failure; - ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse); - ci.rta_used = rt->u.dst.__use; - ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt); - if (rt->u.dst.expires) - ci.rta_expires = jiffies_to_clock_t(rt->u.dst.expires - jiffies); - else - ci.rta_expires = 0; - ci.rta_error = rt->u.dst.error; - ci.rta_id = ci.rta_ts = ci.rta_tsage = 0; + error = rt->u.dst.error; + expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0; if (rt->peer) { - ci.rta_id = rt->peer->ip_id_count; + id = rt->peer->ip_id_count; if (rt->peer->tcp_ts_stamp) { - ci.rta_ts = rt->peer->tcp_ts; - ci.rta_tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp; + ts = rt->peer->tcp_ts; + tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp; } } @@ -2720,7 +2703,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, } else { if (err == -EMSGSIZE) goto nla_put_failure; - ci.rta_error = err; + error = err; } } } else @@ -2728,12 +2711,15 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); } - NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); + if (rtnl_put_cacheinfo(skb, &rt->u.dst, id, ts, tsage, + expires, error) < 0) + goto nla_put_failure; return nlmsg_end(skb, nlh); nla_put_failure: - return nlmsg_cancel(skb, nlh); + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) @@ -2768,8 +2754,8 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) skb->nh.iph->protocol = IPPROTO_ICMP; skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); - src = tb[RTA_SRC] ? nla_get_u32(tb[RTA_SRC]) : 0; - dst = tb[RTA_DST] ? nla_get_u32(tb[RTA_DST]) : 0; + src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0; + dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0; iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; if (iif) { @@ -2844,7 +2830,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; skb->dst = dst_clone(&rt->u.dst); if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, RTM_NEWROUTE, + cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1, NLM_F_MULTI) <= 0) { dst_release(xchg(&skb->dst, NULL)); rcu_read_unlock_bh(); @@ -2877,7 +2863,7 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write, proc_dointvec(ctl, write, filp, buffer, lenp, ppos); rt_cache_flush(flush_delay); return 0; - } + } return -EINVAL; } @@ -2888,20 +2874,19 @@ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, - size_t newlen, - void **context) + size_t newlen) { int delay; if (newlen != sizeof(int)) return -EINVAL; if (get_user(delay, (int __user *)newval)) - return -EFAULT; - rt_cache_flush(delay); + return -EFAULT; + rt_cache_flush(delay); return 0; } ctl_table ipv4_route_table[] = { - { + { .ctl_name = NET_IPV4_ROUTE_FLUSH, .procname = "flush", .data = &flush_delay, @@ -2946,7 +2931,7 @@ ctl_table ipv4_route_table[] = { }, { /* Deprecated. Use gc_min_interval_ms */ - + .ctl_name = NET_IPV4_ROUTE_GC_MIN_INTERVAL, .procname = "gc_min_interval", .data = &ip_rt_gc_min_interval, @@ -3195,8 +3180,8 @@ int __init ip_rt_init(void) { struct proc_dir_entry *rtstat_pde = NULL; /* keep gcc happy */ if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) || - !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO, - proc_net_stat))) { + !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO, + proc_net_stat))) { return -ENOMEM; } rtstat_pde->proc_fops = &rt_cpu_seq_fops;