X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=net%2Fipv4%2Fnetfilter%2Fip_tables.c;h=3856aa3f231e8648dff647f048838f9474330b4b;hb=e5afbba1869a5d9509c61f8962be9bdebf95f7d3;hp=f5b66ec18b0d6a9ccff81bb90b7f6c3253fbe735;hpb=e79ec50b9587c175f65f98550d66ad5b96c05dd9;p=safe%2Fjmp%2Flinux-2.6 diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index f5b66ec..3856aa3 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -53,7 +53,7 @@ MODULE_DESCRIPTION("IPv4 packet filter"); do { \ if (!(x)) \ printk("IP_NF_ASSERT: %s:%s:%u\n", \ - __FUNCTION__, __FILE__, __LINE__); \ + __func__, __FILE__, __LINE__); \ } while(0) #else #define IP_NF_ASSERT(x) @@ -75,6 +75,7 @@ do { \ Hence the start of any table is given by get_table() below. */ /* Returns whether matches rule or not. */ +/* Performance critical - called for every packet */ static inline bool ip_packet_match(const struct iphdr *ip, const char *indev, @@ -82,7 +83,6 @@ ip_packet_match(const struct iphdr *ip, const struct ipt_ip *ipinfo, int isfrag) { - size_t i; unsigned long ret; #define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg))) @@ -93,25 +93,16 @@ ip_packet_match(const struct iphdr *ip, IPT_INV_DSTIP)) { dprintf("Source or dest mismatch.\n"); - dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n", - NIPQUAD(ip->saddr), - NIPQUAD(ipinfo->smsk.s_addr), - NIPQUAD(ipinfo->src.s_addr), + dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n", + &ip->saddr, &ipinfo->smsk.s_addr, &ipinfo->src.s_addr, ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : ""); - dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n", - NIPQUAD(ip->daddr), - NIPQUAD(ipinfo->dmsk.s_addr), - NIPQUAD(ipinfo->dst.s_addr), + dprintf("DST: %pI4 Mask: %pI4 Target: %pI4.%s\n", + &ip->daddr, &ipinfo->dmsk.s_addr, &ipinfo->dst.s_addr, ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : ""); return false; } - /* Look for ifname matches; this should unroll nicely. */ - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { - ret |= (((const unsigned long *)indev)[i] - ^ ((const unsigned long *)ipinfo->iniface)[i]) - & ((const unsigned long *)ipinfo->iniface_mask)[i]; - } + ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask); if (FWINV(ret != 0, IPT_INV_VIA_IN)) { dprintf("VIA in mismatch (%s vs %s).%s\n", @@ -120,11 +111,7 @@ ip_packet_match(const struct iphdr *ip, return false; } - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { - ret |= (((const unsigned long *)outdev)[i] - ^ ((const unsigned long *)ipinfo->outiface)[i]) - & ((const unsigned long *)ipinfo->outiface_mask)[i]; - } + ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask); if (FWINV(ret != 0, IPT_INV_VIA_OUT)) { dprintf("VIA out mismatch (%s vs %s).%s\n", @@ -153,7 +140,7 @@ ip_packet_match(const struct iphdr *ip, return true; } -static inline bool +static bool ip_checkentry(const struct ipt_ip *ip) { if (ip->flags & ~IPT_F_MASK) { @@ -170,35 +157,31 @@ ip_checkentry(const struct ipt_ip *ip) } static unsigned int -ipt_error(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) +ipt_error(struct sk_buff *skb, const struct xt_target_param *par) { if (net_ratelimit()) - printk("ip_tables: error: `%s'\n", (char *)targinfo); + printk("ip_tables: error: `%s'\n", + (const char *)par->targinfo); return NF_DROP; } -static inline -bool do_match(struct ipt_entry_match *m, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int offset, - bool *hotdrop) +/* Performance critical - called for every packet */ +static inline bool +do_match(struct ipt_entry_match *m, const struct sk_buff *skb, + struct xt_match_param *par) { + par->match = m->u.kernel.match; + par->matchinfo = m->data; + /* Stop iteration if it doesn't match */ - if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data, - offset, ip_hdrlen(skb), hotdrop)) + if (!m->u.kernel.match->match(skb, par)) return true; else return false; } +/* Performance critical */ static inline struct ipt_entry * get_entry(void *base, unsigned int offset) { @@ -206,6 +189,7 @@ get_entry(void *base, unsigned int offset) } /* All zeroes == unconditional rule. */ +/* Mildly perf critical (only if packet tracing is on) */ static inline int unconditional(const struct ipt_ip *ip) { @@ -221,7 +205,7 @@ unconditional(const struct ipt_ip *ip) #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) -static const char *hooknames[] = { +static const char *const hooknames[] = { [NF_INET_PRE_ROUTING] = "PREROUTING", [NF_INET_LOCAL_IN] = "INPUT", [NF_INET_FORWARD] = "FORWARD", @@ -235,7 +219,7 @@ enum nf_ip_trace_comments { NF_IP_TRACE_COMMENT_POLICY, }; -static const char *comments[] = { +static const char *const comments[] = { [NF_IP_TRACE_COMMENT_RULE] = "rule", [NF_IP_TRACE_COMMENT_RETURN] = "return", [NF_IP_TRACE_COMMENT_POLICY] = "policy", @@ -251,10 +235,11 @@ static struct nf_loginfo trace_loginfo = { }, }; +/* Mildly perf critical (only if packet tracing is on) */ static inline int get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e, - char *hookname, char **chainname, - char **comment, unsigned int *rulenum) + const char *hookname, const char **chainname, + const char **comment, unsigned int *rulenum) { struct ipt_standard_target *t = (void *)ipt_get_target(s); @@ -272,8 +257,8 @@ get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e, && unconditional(&s->ip)) { /* Tail of chains: STANDARD target (return/policy) */ *comment = *chainname == hookname - ? (char *)comments[NF_IP_TRACE_COMMENT_POLICY] - : (char *)comments[NF_IP_TRACE_COMMENT_RETURN]; + ? comments[NF_IP_TRACE_COMMENT_POLICY] + : comments[NF_IP_TRACE_COMMENT_RETURN]; } return 1; } else @@ -286,20 +271,20 @@ static void trace_packet(struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, - char *tablename, + const char *tablename, struct xt_table_info *private, struct ipt_entry *e) { void *table_base; - struct ipt_entry *root; - char *hookname, *chainname, *comment; + const struct ipt_entry *root; + const char *hookname, *chainname, *comment; unsigned int rulenum = 0; - table_base = (void *)private->entries[smp_processor_id()]; + table_base = private->entries[smp_processor_id()]; root = get_entry(table_base, private->hook_entry[hook]); - hookname = chainname = (char *)hooknames[hook]; - comment = (char *)comments[NF_IP_TRACE_COMMENT_RULE]; + hookname = chainname = hooknames[hook]; + comment = comments[NF_IP_TRACE_COMMENT_RULE]; IPT_ENTRY_ITERATE(root, private->size - private->hook_entry[hook], @@ -312,6 +297,12 @@ static void trace_packet(struct sk_buff *skb, } #endif +static inline __pure +struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry) +{ + return (void *)entry + entry->next_offset; +} + /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int ipt_do_table(struct sk_buff *skb, @@ -320,10 +311,10 @@ ipt_do_table(struct sk_buff *skb, const struct net_device *out, struct xt_table *table) { +#define tb_comefrom ((struct ipt_entry *)table_base)->comefrom + static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); - u_int16_t offset; - struct iphdr *ip; - u_int16_t datalen; + const struct iphdr *ip; bool hotdrop = false; /* Initializing verdict to NF_DROP keeps gcc happy. */ unsigned int verdict = NF_DROP; @@ -331,10 +322,11 @@ ipt_do_table(struct sk_buff *skb, void *table_base; struct ipt_entry *e, *back; struct xt_table_info *private; + struct xt_match_param mtpar; + struct xt_target_param tgpar; /* Initialization */ ip = ip_hdr(skb); - datalen = skb->len - ip->ihl * 4; indev = in ? in->name : nulldevname; outdev = out ? out->name : nulldevname; /* We handle fragments by dealing with the first fragment as @@ -343,110 +335,103 @@ ipt_do_table(struct sk_buff *skb, * things we don't know, ie. tcp syn flag or ports). If the * rule is also a fragment-specific rule, non-fragments won't * match it. */ - offset = ntohs(ip->frag_off) & IP_OFFSET; + mtpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET; + mtpar.thoff = ip_hdrlen(skb); + mtpar.hotdrop = &hotdrop; + mtpar.in = tgpar.in = in; + mtpar.out = tgpar.out = out; + mtpar.family = tgpar.family = NFPROTO_IPV4; + mtpar.hooknum = tgpar.hooknum = hook; - read_lock_bh(&table->lock); IP_NF_ASSERT(table->valid_hooks & (1 << hook)); + xt_info_rdlock_bh(); private = table->private; - table_base = (void *)private->entries[smp_processor_id()]; + table_base = private->entries[smp_processor_id()]; + e = get_entry(table_base, private->hook_entry[hook]); /* For return from builtin chain */ back = get_entry(table_base, private->underflow[hook]); do { + struct ipt_entry_target *t; + IP_NF_ASSERT(e); IP_NF_ASSERT(back); - if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) { - struct ipt_entry_target *t; - - if (IPT_MATCH_ITERATE(e, do_match, - skb, in, out, - offset, &hotdrop) != 0) - goto no_match; + if (!ip_packet_match(ip, indev, outdev, + &e->ip, mtpar.fragoff) || + IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) { + e = ipt_next_entry(e); + continue; + } - ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1); + ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1); - t = ipt_get_target(e); - IP_NF_ASSERT(t->u.kernel.target); + t = ipt_get_target(e); + IP_NF_ASSERT(t->u.kernel.target); #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) - /* The packet is traced: log it */ - if (unlikely(skb->nf_trace)) - trace_packet(skb, hook, in, out, - table->name, private, e); + /* The packet is traced: log it */ + if (unlikely(skb->nf_trace)) + trace_packet(skb, hook, in, out, + table->name, private, e); #endif - /* Standard target? */ - if (!t->u.kernel.target->target) { - int v; - - v = ((struct ipt_standard_target *)t)->verdict; - if (v < 0) { - /* Pop from stack? */ - if (v != IPT_RETURN) { - verdict = (unsigned)(-v) - 1; - break; - } - e = back; - back = get_entry(table_base, - back->comefrom); - continue; - } - if (table_base + v != (void *)e + e->next_offset - && !(e->ip.flags & IPT_F_GOTO)) { - /* Save old back ptr in next entry */ - struct ipt_entry *next - = (void *)e + e->next_offset; - next->comefrom - = (void *)back - table_base; - /* set back pointer to next entry */ - back = next; + /* Standard target? */ + if (!t->u.kernel.target->target) { + int v; + + v = ((struct ipt_standard_target *)t)->verdict; + if (v < 0) { + /* Pop from stack? */ + if (v != IPT_RETURN) { + verdict = (unsigned)(-v) - 1; + break; } + e = back; + back = get_entry(table_base, back->comefrom); + continue; + } + if (table_base + v != ipt_next_entry(e) + && !(e->ip.flags & IPT_F_GOTO)) { + /* Save old back ptr in next entry */ + struct ipt_entry *next = ipt_next_entry(e); + next->comefrom = (void *)back - table_base; + /* set back pointer to next entry */ + back = next; + } + + e = get_entry(table_base, v); + continue; + } + + /* Targets which reenter must return + abs. verdicts */ + tgpar.target = t->u.kernel.target; + tgpar.targinfo = t->data; - e = get_entry(table_base, v); - } else { - /* Targets which reenter must return - abs. verdicts */ -#ifdef CONFIG_NETFILTER_DEBUG - ((struct ipt_entry *)table_base)->comefrom - = 0xeeeeeeec; -#endif - verdict = t->u.kernel.target->target(skb, - in, out, - hook, - t->u.kernel.target, - t->data); #ifdef CONFIG_NETFILTER_DEBUG - if (((struct ipt_entry *)table_base)->comefrom - != 0xeeeeeeec - && verdict == IPT_CONTINUE) { - printk("Target %s reentered!\n", - t->u.kernel.target->name); - verdict = NF_DROP; - } - ((struct ipt_entry *)table_base)->comefrom - = 0x57acc001; + tb_comefrom = 0xeeeeeeec; #endif - /* Target might have changed stuff. */ - ip = ip_hdr(skb); - datalen = skb->len - ip->ihl * 4; - - if (verdict == IPT_CONTINUE) - e = (void *)e + e->next_offset; - else - /* Verdict */ - break; - } - } else { - - no_match: - e = (void *)e + e->next_offset; + verdict = t->u.kernel.target->target(skb, &tgpar); +#ifdef CONFIG_NETFILTER_DEBUG + if (tb_comefrom != 0xeeeeeeec && verdict == IPT_CONTINUE) { + printk("Target %s reentered!\n", + t->u.kernel.target->name); + verdict = NF_DROP; } + tb_comefrom = 0x57acc001; +#endif + /* Target might have changed stuff. */ + ip = ip_hdr(skb); + if (verdict == IPT_CONTINUE) + e = ipt_next_entry(e); + else + /* Verdict */ + break; } while (!hotdrop); - - read_unlock_bh(&table->lock); + xt_info_rdunlock_bh(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; @@ -455,6 +440,8 @@ ipt_do_table(struct sk_buff *skb, return NF_DROP; else return verdict; #endif + +#undef tb_comefrom } /* Figures out from what hook each rule can be called: returns 0 if @@ -497,7 +484,9 @@ mark_source_chains(struct xt_table_info *newinfo, && unconditional(&e->ip)) || visited) { unsigned int oldpos, size; - if (t->verdict < -NF_MAX_VERDICT - 1) { + if ((strcmp(t->target.u.user.name, + IPT_STANDARD_TARGET) == 0) && + t->verdict < -NF_MAX_VERDICT - 1) { duprintf("mark_source_chains: bad " "negative verdict (%i)\n", t->verdict); @@ -567,19 +556,24 @@ mark_source_chains(struct xt_table_info *newinfo, return 1; } -static inline int +static int cleanup_match(struct ipt_entry_match *m, unsigned int *i) { + struct xt_mtdtor_param par; + if (i && (*i)-- == 0) return 1; - if (m->u.kernel.match->destroy) - m->u.kernel.match->destroy(m->u.kernel.match, m->data); - module_put(m->u.kernel.match->me); + par.match = m->u.kernel.match; + par.matchinfo = m->data; + par.family = NFPROTO_IPV4; + if (par.match->destroy != NULL) + par.match->destroy(&par); + module_put(par.match->me); return 0; } -static inline int +static int check_entry(struct ipt_entry *e, const char *name) { struct ipt_entry_target *t; @@ -600,34 +594,29 @@ check_entry(struct ipt_entry *e, const char *name) return 0; } -static inline int check_match(struct ipt_entry_match *m, const char *name, - const struct ipt_ip *ip, - unsigned int hookmask, unsigned int *i) +static int +check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par, + unsigned int *i) { - struct xt_match *match; + const struct ipt_ip *ip = par->entryinfo; int ret; - match = m->u.kernel.match; - ret = xt_check_match(match, AF_INET, m->u.match_size - sizeof(*m), - name, hookmask, ip->proto, - ip->invflags & IPT_INV_PROTO); - if (!ret && m->u.kernel.match->checkentry - && !m->u.kernel.match->checkentry(name, ip, match, m->data, - hookmask)) { + par->match = m->u.kernel.match; + par->matchinfo = m->data; + + ret = xt_check_match(par, m->u.match_size - sizeof(*m), + ip->proto, ip->invflags & IPT_INV_PROTO); + if (ret < 0) { duprintf("ip_tables: check failed for `%s'.\n", - m->u.kernel.match->name); - ret = -EINVAL; + par.match->name); + return ret; } - if (!ret) - (*i)++; - return ret; + ++*i; + return 0; } -static inline int -find_check_match(struct ipt_entry_match *m, - const char *name, - const struct ipt_ip *ip, - unsigned int hookmask, +static int +find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par, unsigned int *i) { struct xt_match *match; @@ -642,7 +631,7 @@ find_check_match(struct ipt_entry_match *m, } m->u.kernel.match = match; - ret = check_match(m, name, ip, hookmask, i); + ret = check_match(m, par, i); if (ret) goto err; @@ -652,28 +641,30 @@ err: return ret; } -static inline int check_target(struct ipt_entry *e, const char *name) +static int check_target(struct ipt_entry *e, const char *name) { - struct ipt_entry_target *t; - struct xt_target *target; + struct ipt_entry_target *t = ipt_get_target(e); + struct xt_tgchk_param par = { + .table = name, + .entryinfo = e, + .target = t->u.kernel.target, + .targinfo = t->data, + .hook_mask = e->comefrom, + .family = NFPROTO_IPV4, + }; int ret; - t = ipt_get_target(e); - target = t->u.kernel.target; - ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t), - name, e->comefrom, e->ip.proto, - e->ip.invflags & IPT_INV_PROTO); - if (!ret && t->u.kernel.target->checkentry - && !t->u.kernel.target->checkentry(name, e, target, t->data, - e->comefrom)) { + ret = xt_check_target(&par, t->u.target_size - sizeof(*t), + e->ip.proto, e->ip.invflags & IPT_INV_PROTO); + if (ret < 0) { duprintf("ip_tables: check failed for `%s'.\n", t->u.kernel.target->name); - ret = -EINVAL; + return ret; } - return ret; + return 0; } -static inline int +static int find_check_entry(struct ipt_entry *e, const char *name, unsigned int size, unsigned int *i) { @@ -681,14 +672,18 @@ find_check_entry(struct ipt_entry *e, const char *name, unsigned int size, struct xt_target *target; int ret; unsigned int j; + struct xt_mtchk_param mtpar; ret = check_entry(e, name); if (ret) return ret; j = 0; - ret = IPT_MATCH_ITERATE(e, find_check_match, name, &e->ip, - e->comefrom, &j); + mtpar.table = name; + mtpar.entryinfo = &e->ip; + mtpar.hook_mask = e->comefrom; + mtpar.family = NFPROTO_IPV4; + ret = IPT_MATCH_ITERATE(e, find_check_match, &mtpar, &j); if (ret != 0) goto cleanup_matches; @@ -717,7 +712,7 @@ find_check_entry(struct ipt_entry *e, const char *name, unsigned int size, return ret; } -static inline int +static int check_entry_size_and_hooks(struct ipt_entry *e, struct xt_table_info *newinfo, unsigned char *base, @@ -760,9 +755,10 @@ check_entry_size_and_hooks(struct ipt_entry *e, return 0; } -static inline int +static int cleanup_entry(struct ipt_entry *e, unsigned int *i) { + struct xt_tgdtor_param par; struct ipt_entry_target *t; if (i && (*i)-- == 0) @@ -771,9 +767,13 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i) /* Cleanup all matches */ IPT_MATCH_ITERATE(e, cleanup_match, NULL); t = ipt_get_target(e); - if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data); - module_put(t->u.kernel.target->me); + + par.target = t->u.kernel.target; + par.targinfo = t->data; + par.family = NFPROTO_IPV4; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); return 0; } @@ -892,10 +892,13 @@ get_counters(const struct xt_table_info *t, /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters - * with data used by 'current' CPU - * We dont care about preemption here. + * with data used by 'current' CPU. + * + * Bottom half has to be disabled to prevent deadlock + * if new softirq were to run and call ipt_do_table */ - curcpu = raw_smp_processor_id(); + local_bh_disable(); + curcpu = smp_processor_id(); i = 0; IPT_ENTRY_ITERATE(t->entries[curcpu], @@ -908,15 +911,18 @@ get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + xt_info_wrlock(cpu); IPT_ENTRY_ITERATE(t->entries[cpu], t->size, add_entry_to_counter, counters, &i); + xt_info_wrunlock(cpu); } + local_bh_enable(); } -static inline struct xt_counters * alloc_counters(struct xt_table *table) +static struct xt_counters * alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; @@ -931,10 +937,7 @@ static inline struct xt_counters * alloc_counters(struct xt_table *table) if (counters == NULL) return ERR_PTR(-ENOMEM); - /* First, sum counters... */ - write_lock_bh(&table->lock); get_counters(private, counters); - write_unlock_bh(&table->lock); return counters; } @@ -947,9 +950,9 @@ copy_entries_to_user(unsigned int total_size, unsigned int off, num; struct ipt_entry *e; struct xt_counters *counters; - struct xt_table_info *private = table->private; + const struct xt_table_info *private = table->private; int ret = 0; - void *loc_cpu_entry; + const void *loc_cpu_entry; counters = alloc_counters(table); if (IS_ERR(counters)) @@ -969,8 +972,8 @@ copy_entries_to_user(unsigned int total_size, /* ... then go back and fix counters and names */ for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ unsigned int i; - struct ipt_entry_match *m; - struct ipt_entry_target *t; + const struct ipt_entry_match *m; + const struct ipt_entry_target *t; e = (struct ipt_entry *)(loc_cpu_entry + off); if (copy_to_user(userptr + off @@ -1086,7 +1089,7 @@ static int compat_table_info(const struct xt_table_info *info, } #endif -static int get_info(void __user *user, int *len, int compat) +static int get_info(struct net *net, void __user *user, int *len, int compat) { char name[IPT_TABLE_MAXNAMELEN]; struct xt_table *t; @@ -1106,11 +1109,11 @@ static int get_info(void __user *user, int *len, int compat) if (compat) xt_compat_lock(AF_INET); #endif - t = try_then_request_module(xt_find_table_lock(AF_INET, name), + t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), "iptable_%s", name); if (t && !IS_ERR(t)) { struct ipt_getinfo info; - struct xt_table_info *private = t->private; + const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT if (compat) { @@ -1146,7 +1149,7 @@ static int get_info(void __user *user, int *len, int compat) } static int -get_entries(struct ipt_get_entries __user *uptr, int *len) +get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len) { int ret; struct ipt_get_entries get; @@ -1164,9 +1167,9 @@ get_entries(struct ipt_get_entries __user *uptr, int *len) return -EINVAL; } - t = xt_find_table_lock(AF_INET, get.name); + t = xt_find_table_lock(net, AF_INET, get.name); if (t && !IS_ERR(t)) { - struct xt_table_info *private = t->private; + const struct xt_table_info *private = t->private; duprintf("t->private->number = %u\n", private->number); if (get.size == private->size) ret = copy_entries_to_user(private->size, @@ -1174,7 +1177,7 @@ get_entries(struct ipt_get_entries __user *uptr, int *len) else { duprintf("get_entries: I've got %u not %u!\n", private->size, get.size); - ret = -EINVAL; + ret = -EAGAIN; } module_put(t->me); xt_table_unlock(t); @@ -1185,7 +1188,7 @@ get_entries(struct ipt_get_entries __user *uptr, int *len) } static int -__do_replace(const char *name, unsigned int valid_hooks, +__do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct xt_table_info *newinfo, unsigned int num_counters, void __user *counters_ptr) { @@ -1202,7 +1205,7 @@ __do_replace(const char *name, unsigned int valid_hooks, goto out; } - t = try_then_request_module(xt_find_table_lock(AF_INET, name), + t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), "iptable_%s", name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; @@ -1231,8 +1234,9 @@ __do_replace(const char *name, unsigned int valid_hooks, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters. */ + /* Get the old counters, and synchronize with replace */ get_counters(oldinfo, counters); + /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, @@ -1255,7 +1259,7 @@ __do_replace(const char *name, unsigned int valid_hooks, } static int -do_replace(void __user *user, unsigned int len) +do_replace(struct net *net, void __user *user, unsigned int len) { int ret; struct ipt_replace tmp; @@ -1289,7 +1293,7 @@ do_replace(void __user *user, unsigned int len) duprintf("ip_tables: Translated table\n"); - ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo, + ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, tmp.counters); if (ret) goto free_newinfo_untrans; @@ -1304,20 +1308,11 @@ do_replace(void __user *user, unsigned int len) /* We're lazy, and add to the first CPU; overflow works its fey magic * and everything is OK. */ -static inline int +static int add_counter_to_entry(struct ipt_entry *e, const struct xt_counters addme[], unsigned int *i) { -#if 0 - duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n", - *i, - (long unsigned int)e->counters.pcnt, - (long unsigned int)e->counters.bcnt, - (long unsigned int)addme[*i].pcnt, - (long unsigned int)addme[*i].bcnt); -#endif - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); (*i)++; @@ -1325,17 +1320,17 @@ add_counter_to_entry(struct ipt_entry *e, } static int -do_add_counters(void __user *user, unsigned int len, int compat) +do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { - unsigned int i; + unsigned int i, curcpu; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; - char *name; + const char *name; int size; void *ptmp; struct xt_table *t; - struct xt_table_info *private; + const struct xt_table_info *private; int ret = 0; void *loc_cpu_entry; #ifdef CONFIG_COMPAT @@ -1377,13 +1372,13 @@ do_add_counters(void __user *user, unsigned int len, int compat) goto free; } - t = xt_find_table_lock(AF_INET, name); + t = xt_find_table_lock(net, AF_INET, name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; } - write_lock_bh(&t->lock); + local_bh_disable(); private = t->private; if (private->number != num_counters) { ret = -EINVAL; @@ -1392,14 +1387,17 @@ do_add_counters(void __user *user, unsigned int len, int compat) i = 0; /* Choose the copy that is on our node */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + curcpu = smp_processor_id(); + loc_cpu_entry = private->entries[curcpu]; + xt_info_wrlock(curcpu); IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, add_counter_to_entry, paddc, &i); + xt_info_wrunlock(curcpu); unlock_up_free: - write_unlock_bh(&t->lock); + local_bh_enable(); xt_table_unlock(t); module_put(t->me); free: @@ -1423,7 +1421,7 @@ struct compat_ipt_replace { static int compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, - compat_uint_t *size, struct xt_counters *counters, + unsigned int *size, struct xt_counters *counters, unsigned int *i) { struct ipt_entry_target *t; @@ -1465,12 +1463,12 @@ out: return ret; } -static inline int +static int compat_find_calc_match(struct ipt_entry_match *m, const char *name, const struct ipt_ip *ip, unsigned int hookmask, - int *size, int *i) + int *size, unsigned int *i) { struct xt_match *match; @@ -1489,7 +1487,7 @@ compat_find_calc_match(struct ipt_entry_match *m, return 0; } -static inline int +static int compat_release_match(struct ipt_entry_match *m, unsigned int *i) { if (i && (*i)-- == 0) @@ -1499,7 +1497,7 @@ compat_release_match(struct ipt_entry_match *m, unsigned int *i) return 0; } -static inline int +static int compat_release_entry(struct compat_ipt_entry *e, unsigned int *i) { struct ipt_entry_target *t; @@ -1514,7 +1512,7 @@ compat_release_entry(struct compat_ipt_entry *e, unsigned int *i) return 0; } -static inline int +static int check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, struct xt_table_info *newinfo, unsigned int *size, @@ -1528,7 +1526,8 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, struct ipt_entry_target *t; struct xt_target *target; unsigned int entry_offset; - int ret, off, h, j; + unsigned int j; + int ret, off, h; duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 @@ -1637,14 +1636,20 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, return ret; } -static inline int compat_check_entry(struct ipt_entry *e, const char *name, +static int +compat_check_entry(struct ipt_entry *e, const char *name, unsigned int *i) { - int j, ret; + struct xt_mtchk_param mtpar; + unsigned int j; + int ret; j = 0; - ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, - e->comefrom, &j); + mtpar.table = name; + mtpar.entryinfo = &e->ip; + mtpar.hook_mask = e->comefrom; + mtpar.family = NFPROTO_IPV4; + ret = IPT_MATCH_ITERATE(e, check_match, &mtpar, &j); if (ret) goto cleanup_matches; @@ -1782,7 +1787,7 @@ out_unlock: } static int -compat_do_replace(void __user *user, unsigned int len) +compat_do_replace(struct net *net, void __user *user, unsigned int len) { int ret; struct compat_ipt_replace tmp; @@ -1819,7 +1824,7 @@ compat_do_replace(void __user *user, unsigned int len) duprintf("compat_do_replace: Translated table\n"); - ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo, + ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, compat_ptr(tmp.counters)); if (ret) goto free_newinfo_untrans; @@ -1843,11 +1848,11 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, switch (cmd) { case IPT_SO_SET_REPLACE: - ret = compat_do_replace(user, len); + ret = compat_do_replace(sock_net(sk), user, len); break; case IPT_SO_SET_ADD_COUNTERS: - ret = do_add_counters(user, len, 1); + ret = do_add_counters(sock_net(sk), user, len, 1); break; default: @@ -1869,11 +1874,11 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *userptr) { struct xt_counters *counters; - struct xt_table_info *private = table->private; + const struct xt_table_info *private = table->private; void __user *pos; unsigned int size; int ret = 0; - void *loc_cpu_entry; + const void *loc_cpu_entry; unsigned int i = 0; counters = alloc_counters(table); @@ -1896,7 +1901,8 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, } static int -compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len) +compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, + int *len) { int ret; struct compat_ipt_get_entries get; @@ -1917,9 +1923,9 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len) } xt_compat_lock(AF_INET); - t = xt_find_table_lock(AF_INET, get.name); + t = xt_find_table_lock(net, AF_INET, get.name); if (t && !IS_ERR(t)) { - struct xt_table_info *private = t->private; + const struct xt_table_info *private = t->private; struct xt_table_info info; duprintf("t->private->number = %u\n", private->number); ret = compat_table_info(private, &info); @@ -1929,7 +1935,7 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len) } else if (!ret) { duprintf("compat_get_entries: I've got %u not %u!\n", private->size, get.size); - ret = -EINVAL; + ret = -EAGAIN; } xt_compat_flush_offsets(AF_INET); module_put(t->me); @@ -1953,10 +1959,10 @@ compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) switch (cmd) { case IPT_SO_GET_INFO: - ret = get_info(user, len, 1); + ret = get_info(sock_net(sk), user, len, 1); break; case IPT_SO_GET_ENTRIES: - ret = compat_get_entries(user, len); + ret = compat_get_entries(sock_net(sk), user, len); break; default: ret = do_ipt_get_ctl(sk, cmd, user, len); @@ -1975,11 +1981,11 @@ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) switch (cmd) { case IPT_SO_SET_REPLACE: - ret = do_replace(user, len); + ret = do_replace(sock_net(sk), user, len); break; case IPT_SO_SET_ADD_COUNTERS: - ret = do_add_counters(user, len, 0); + ret = do_add_counters(sock_net(sk), user, len, 0); break; default: @@ -2000,11 +2006,11 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) switch (cmd) { case IPT_SO_GET_INFO: - ret = get_info(user, len, 0); + ret = get_info(sock_net(sk), user, len, 0); break; case IPT_SO_GET_ENTRIES: - ret = get_entries(user, len); + ret = get_entries(sock_net(sk), user, len); break; case IPT_SO_GET_REVISION_MATCH: @@ -2041,17 +2047,21 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) return ret; } -int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl) +struct xt_table *ipt_register_table(struct net *net, struct xt_table *table, + const struct ipt_replace *repl) { int ret; struct xt_table_info *newinfo; struct xt_table_info bootstrap = { 0, 0, 0, { 0 }, { 0 }, { } }; void *loc_cpu_entry; + struct xt_table *new_table; newinfo = xt_alloc_table_info(repl->size); - if (!newinfo) - return -ENOMEM; + if (!newinfo) { + ret = -ENOMEM; + goto out; + } /* choose the copy on our node/cpu, but dont care about preemption */ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; @@ -2062,30 +2072,36 @@ int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl) repl->num_entries, repl->hook_entry, repl->underflow); - if (ret != 0) { - xt_free_table_info(newinfo); - return ret; - } + if (ret != 0) + goto out_free; - ret = xt_register_table(table, &bootstrap, newinfo); - if (ret != 0) { - xt_free_table_info(newinfo); - return ret; + new_table = xt_register_table(net, table, &bootstrap, newinfo); + if (IS_ERR(new_table)) { + ret = PTR_ERR(new_table); + goto out_free; } - return 0; + return new_table; + +out_free: + xt_free_table_info(newinfo); +out: + return ERR_PTR(ret); } void ipt_unregister_table(struct xt_table *table) { struct xt_table_info *private; void *loc_cpu_entry; + struct module *table_owner = table->me; private = xt_unregister_table(table); /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries[raw_smp_processor_id()]; IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL); + if (private->number > private->initial_entries) + module_put(table_owner); xt_free_table_info(private); } @@ -2101,29 +2117,23 @@ icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code, } static bool -icmp_match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) +icmp_match(const struct sk_buff *skb, const struct xt_match_param *par) { - struct icmphdr _icmph, *ic; - const struct ipt_icmp *icmpinfo = matchinfo; + const struct icmphdr *ic; + struct icmphdr _icmph; + const struct ipt_icmp *icmpinfo = par->matchinfo; /* Must not be a fragment. */ - if (offset) + if (par->fragoff != 0) return false; - ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph); + ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph); if (ic == NULL) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ duprintf("Dropping evil ICMP tinygram.\n"); - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -2134,15 +2144,9 @@ icmp_match(const struct sk_buff *skb, !!(icmpinfo->invflags&IPT_ICMP_INV)); } -/* Called when user tries to insert an entry of this type. */ -static bool -icmp_checkentry(const char *tablename, - const void *entry, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +static bool icmp_checkentry(const struct xt_mtchk_param *par) { - const struct ipt_icmp *icmpinfo = matchinfo; + const struct ipt_icmp *icmpinfo = par->matchinfo; /* Must specify no unknown invflags */ return !(icmpinfo->invflags & ~IPT_ICMP_INV); @@ -2152,7 +2156,7 @@ icmp_checkentry(const char *tablename, static struct xt_target ipt_standard_target __read_mostly = { .name = IPT_STANDARD_TARGET, .targetsize = sizeof(int), - .family = AF_INET, + .family = NFPROTO_IPV4, #ifdef CONFIG_COMPAT .compatsize = sizeof(compat_int_t), .compat_from_user = compat_standard_from_user, @@ -2164,7 +2168,7 @@ static struct xt_target ipt_error_target __read_mostly = { .name = IPT_ERROR_TARGET, .target = ipt_error, .targetsize = IPT_FUNCTION_MAXNAMELEN, - .family = AF_INET, + .family = NFPROTO_IPV4, }; static struct nf_sockopt_ops ipt_sockopts = { @@ -2190,14 +2194,29 @@ static struct xt_match icmp_matchstruct __read_mostly = { .matchsize = sizeof(struct ipt_icmp), .checkentry = icmp_checkentry, .proto = IPPROTO_ICMP, - .family = AF_INET, + .family = NFPROTO_IPV4, +}; + +static int __net_init ip_tables_net_init(struct net *net) +{ + return xt_proto_init(net, NFPROTO_IPV4); +} + +static void __net_exit ip_tables_net_exit(struct net *net) +{ + xt_proto_fini(net, NFPROTO_IPV4); +} + +static struct pernet_operations ip_tables_net_ops = { + .init = ip_tables_net_init, + .exit = ip_tables_net_exit, }; static int __init ip_tables_init(void) { int ret; - ret = xt_proto_init(AF_INET); + ret = register_pernet_subsys(&ip_tables_net_ops); if (ret < 0) goto err1; @@ -2227,7 +2246,7 @@ err4: err3: xt_unregister_target(&ipt_standard_target); err2: - xt_proto_fini(AF_INET); + unregister_pernet_subsys(&ip_tables_net_ops); err1: return ret; } @@ -2240,7 +2259,7 @@ static void __exit ip_tables_fini(void) xt_unregister_target(&ipt_error_target); xt_unregister_target(&ipt_standard_target); - xt_proto_fini(AF_INET); + unregister_pernet_subsys(&ip_tables_net_ops); } EXPORT_SYMBOL(ipt_register_table);