X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=net%2Fipv4%2Fnetfilter%2Fipt_CLUSTERIP.c;h=018fea3fcb5f5df2ea94fa49930cc078e13ad823;hb=a8d0f9526ff8510d6fa5e708ef5386af19503299;hp=adbf4d752d0f1a1170c75037feb3e36abe7f39d1;hpb=4451362445b2d83886003f1d739b94e4f000eeeb;p=safe%2Fjmp%2Flinux-2.6 diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index adbf4d7..018fea3 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -10,9 +10,9 @@ * */ #include -#include #include #include +#include #include #include #include @@ -28,9 +28,9 @@ #include #include -#include +#include -#define CLUSTERIP_VERSION "0.7" +#define CLUSTERIP_VERSION "0.8" #define DEBUG_CLUSTERIP @@ -40,8 +40,6 @@ #define DEBUGP #endif -#define ASSERT_READ_LOCK(x) - MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte "); MODULE_DESCRIPTION("iptables target for CLUSTERIP"); @@ -52,12 +50,11 @@ struct clusterip_config { atomic_t entries; /* number of entries/rules * referencing us */ - u_int32_t clusterip; /* the IP address */ + __be32 clusterip; /* the IP address */ u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ struct net_device *dev; /* device */ u_int16_t num_total_nodes; /* total number of nodes */ - u_int16_t num_local_nodes; /* number of local nodes */ - u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; /* node number array */ + unsigned long local_nodes; /* node number array */ #ifdef CONFIG_PROC_FS struct proc_dir_entry *pde; /* proc dir entry */ @@ -68,8 +65,7 @@ struct clusterip_config { static LIST_HEAD(clusterip_configs); -/* clusterip_lock protects the clusterip_configs list _AND_ the configurable - * data within all structurses (num_local_nodes, local_nodes[]) */ +/* clusterip_lock protects the clusterip_configs list */ static DEFINE_RWLOCK(clusterip_lock); #ifdef CONFIG_PROC_FS @@ -121,11 +117,10 @@ clusterip_config_entry_put(struct clusterip_config *c) } static struct clusterip_config * -__clusterip_config_find(u_int32_t clusterip) +__clusterip_config_find(__be32 clusterip) { struct list_head *pos; - ASSERT_READ_LOCK(&clusterip_lock); list_for_each(pos, &clusterip_configs) { struct clusterip_config *c = list_entry(pos, struct clusterip_config, list); @@ -138,7 +133,7 @@ __clusterip_config_find(u_int32_t clusterip) } static inline struct clusterip_config * -clusterip_config_find_get(u_int32_t clusterip, int entry) +clusterip_config_find_get(__be32 clusterip, int entry) { struct clusterip_config *c; @@ -156,36 +151,49 @@ clusterip_config_find_get(u_int32_t clusterip, int entry) return c; } +static void +clusterip_config_init_nodelist(struct clusterip_config *c, + const struct ipt_clusterip_tgt_info *i) +{ + int n; + + for (n = 0; n < i->num_local_nodes; n++) { + set_bit(i->local_nodes[n] - 1, &c->local_nodes); + } +} + static struct clusterip_config * -clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, +clusterip_config_init(struct ipt_clusterip_tgt_info *i, __be32 ip, struct net_device *dev) { struct clusterip_config *c; - char buffer[16]; - c = kmalloc(sizeof(*c), GFP_ATOMIC); + c = kzalloc(sizeof(*c), GFP_ATOMIC); if (!c) return NULL; - memset(c, 0, sizeof(*c)); c->dev = dev; c->clusterip = ip; memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); c->num_total_nodes = i->num_total_nodes; - c->num_local_nodes = i->num_local_nodes; - memcpy(&c->local_nodes, &i->local_nodes, sizeof(c->local_nodes)); + clusterip_config_init_nodelist(c, i); c->hash_mode = i->hash_mode; c->hash_initval = i->hash_initval; atomic_set(&c->refcount, 1); atomic_set(&c->entries, 1); #ifdef CONFIG_PROC_FS - /* create proc dir entry */ - sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip)); - c->pde = create_proc_entry(buffer, S_IWUSR|S_IRUSR, clusterip_procdir); - if (!c->pde) { - kfree(c); - return NULL; + { + char buffer[16]; + + /* create proc dir entry */ + sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip)); + c->pde = create_proc_entry(buffer, S_IWUSR|S_IRUSR, + clusterip_procdir); + if (!c->pde) { + kfree(c); + return NULL; + } } c->pde->proc_fops = &clusterip_proc_fops; c->pde->data = c; @@ -198,58 +206,35 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, return c; } +#ifdef CONFIG_PROC_FS static int clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) { - int i; - - write_lock_bh(&clusterip_lock); - if (c->num_local_nodes >= CLUSTERIP_MAX_NODES - || nodenum > CLUSTERIP_MAX_NODES) { - write_unlock_bh(&clusterip_lock); + if (nodenum == 0 || + nodenum > c->num_total_nodes) return 1; - } - - /* check if we alrady have this number in our array */ - for (i = 0; i < c->num_local_nodes; i++) { - if (c->local_nodes[i] == nodenum) { - write_unlock_bh(&clusterip_lock); - return 1; - } - } - c->local_nodes[c->num_local_nodes++] = nodenum; + /* check if we already have this number in our bitfield */ + if (test_and_set_bit(nodenum - 1, &c->local_nodes)) + return 1; - write_unlock_bh(&clusterip_lock); return 0; } static int clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) { - int i; - - write_lock_bh(&clusterip_lock); - - if (c->num_local_nodes <= 1 || nodenum > CLUSTERIP_MAX_NODES) { - write_unlock_bh(&clusterip_lock); + if (nodenum == 0 || + nodenum > c->num_total_nodes) return 1; - } - for (i = 0; i < c->num_local_nodes; i++) { - if (c->local_nodes[i] == nodenum) { - int size = sizeof(u_int16_t)*(c->num_local_nodes-(i+1)); - memmove(&c->local_nodes[i], &c->local_nodes[i+1], size); - c->num_local_nodes--; - write_unlock_bh(&clusterip_lock); - return 0; - } - } + if (test_and_clear_bit(nodenum - 1, &c->local_nodes)) + return 0; - write_unlock_bh(&clusterip_lock); return 1; } +#endif static inline u_int32_t clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) @@ -257,25 +242,18 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) struct iphdr *iph = skb->nh.iph; unsigned long hashval; u_int16_t sport, dport; - struct tcphdr *th; - struct udphdr *uh; - struct icmphdr *ih; + u_int16_t *ports; switch (iph->protocol) { case IPPROTO_TCP: - th = (void *)iph+iph->ihl*4; - sport = ntohs(th->source); - dport = ntohs(th->dest); - break; case IPPROTO_UDP: - uh = (void *)iph+iph->ihl*4; - sport = ntohs(uh->source); - dport = ntohs(uh->dest); - break; + case IPPROTO_UDPLITE: + case IPPROTO_SCTP: + case IPPROTO_DCCP: case IPPROTO_ICMP: - ih = (void *)iph+iph->ihl*4; - sport = ntohs(ih->un.echo.id); - dport = (ih->type<<8)|ih->code; + ports = (void *)iph+iph->ihl*4; + sport = ports[0]; + dport = ports[1]; break; default: if (net_ratelimit()) { @@ -315,25 +293,7 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) static inline int clusterip_responsible(struct clusterip_config *config, u_int32_t hash) { - int i; - - read_lock_bh(&clusterip_lock); - - if (config->num_local_nodes == 0) { - read_unlock_bh(&clusterip_lock); - return 0; - } - - for (i = 0; i < config->num_local_nodes; i++) { - if (config->local_nodes[i] == hash) { - read_unlock_bh(&clusterip_lock); - return 1; - } - } - - read_unlock_bh(&clusterip_lock); - - return 0; + return test_bit(hash - 1, &config->local_nodes); } /*********************************************************************** @@ -345,19 +305,19 @@ target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, - const void *targinfo, - void *userinfo) + const struct xt_target *target, + const void *targinfo) { const struct ipt_clusterip_tgt_info *cipinfo = targinfo; enum ip_conntrack_info ctinfo; - struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo); - u_int32_t hash; + u_int32_t *mark, hash; /* don't need to clusterip_config_get() here, since refcount * is only decremented by destroy() - and ip_tables guarantees * that the ->target() function isn't called after ->destroy() */ - if (!ct) { + mark = nf_ct_get_mark((*pskb), &ctinfo); + if (mark == NULL) { printk(KERN_ERR "CLUSTERIP: no conntrack!\n"); /* FIXME: need to drop invalid ones, since replies * to outgoing connections of other nodes will be @@ -380,7 +340,7 @@ target(struct sk_buff **pskb, switch (ctinfo) { case IP_CT_NEW: - ct->mark = hash; + *mark = hash; break; case IP_CT_RELATED: case IP_CT_RELATED+IP_CT_IS_REPLY: @@ -397,7 +357,7 @@ target(struct sk_buff **pskb, #ifdef DEBUG_CLUSTERP DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); #endif - DEBUGP("hash=%u ct_hash=%u ", hash, ct->mark); + DEBUGP("hash=%u ct_hash=%u ", hash, *mark); if (!clusterip_responsible(cipinfo->config, hash)) { DEBUGP("not responsible\n"); return NF_DROP; @@ -413,22 +373,16 @@ target(struct sk_buff **pskb, static int checkentry(const char *tablename, - const struct ipt_entry *e, + const void *e_void, + const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct ipt_clusterip_tgt_info *cipinfo = targinfo; + const struct ipt_entry *e = e_void; struct clusterip_config *config; - if (targinfosize != IPT_ALIGN(sizeof(struct ipt_clusterip_tgt_info))) { - printk(KERN_WARNING "CLUSTERIP: targinfosize %u != %Zu\n", - targinfosize, - IPT_ALIGN(sizeof(struct ipt_clusterip_tgt_info))); - return 0; - } - if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP && cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT && cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) { @@ -437,7 +391,7 @@ checkentry(const char *tablename, return 0; } - if (e->ip.dmsk.s_addr != 0xffffffff + if (e->ip.dmsk.s_addr != htonl(0xffffffff) || e->ip.dst.s_addr == 0) { printk(KERN_ERR "CLUSTERIP: Please specify destination IP\n"); return 0; @@ -494,27 +448,36 @@ checkentry(const char *tablename, cipinfo->config = config; } + if (nf_ct_l3proto_try_module_get(target->family) < 0) { + printk(KERN_WARNING "can't load conntrack support for " + "proto=%d\n", target->family); + return 0; + } + return 1; } /* drop reference count of cluster config when rule is deleted */ -static void destroy(void *matchinfo, unsigned int matchinfosize) +static void destroy(const struct xt_target *target, void *targinfo) { - struct ipt_clusterip_tgt_info *cipinfo = matchinfo; + struct ipt_clusterip_tgt_info *cipinfo = targinfo; /* if no more entries are referencing the config, remove it * from the list and destroy the proc entry */ clusterip_config_entry_put(cipinfo->config); clusterip_config_put(cipinfo->config); + + nf_ct_l3proto_module_put(target->family); } -static struct ipt_target clusterip_tgt = { - .name = "CLUSTERIP", - .target = &target, - .checkentry = &checkentry, - .destroy = &destroy, - .me = THIS_MODULE +static struct ipt_target clusterip_tgt = { + .name = "CLUSTERIP", + .target = target, + .targetsize = sizeof(struct ipt_clusterip_tgt_info), + .checkentry = checkentry, + .destroy = destroy, + .me = THIS_MODULE }; @@ -525,9 +488,9 @@ static struct ipt_target clusterip_tgt = { /* hardcoded for 48bit ethernet and 32bit ipv4 addresses */ struct arp_payload { u_int8_t src_hw[ETH_ALEN]; - u_int32_t src_ip; + __be32 src_ip; u_int8_t dst_hw[ETH_ALEN]; - u_int32_t dst_ip; + __be32 dst_ip; } __attribute__ ((packed)); #ifdef CLUSTERIP_DEBUG @@ -618,56 +581,69 @@ static struct nf_hook_ops cip_arp_ops = { #ifdef CONFIG_PROC_FS +struct clusterip_seq_position { + unsigned int pos; /* position */ + unsigned int weight; /* number of bits set == size */ + unsigned int bit; /* current bit */ + unsigned long val; /* current value */ +}; + static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) { struct proc_dir_entry *pde = s->private; struct clusterip_config *c = pde->data; - unsigned int *nodeidx; - - read_lock_bh(&clusterip_lock); - if (*pos >= c->num_local_nodes) + unsigned int weight; + u_int32_t local_nodes; + struct clusterip_seq_position *idx; + + /* FIXME: possible race */ + local_nodes = c->local_nodes; + weight = hweight32(local_nodes); + if (*pos >= weight) return NULL; - nodeidx = kmalloc(sizeof(unsigned int), GFP_KERNEL); - if (!nodeidx) + idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL); + if (!idx) return ERR_PTR(-ENOMEM); - *nodeidx = *pos; - return nodeidx; + idx->pos = *pos; + idx->weight = weight; + idx->bit = ffs(local_nodes); + idx->val = local_nodes; + clear_bit(idx->bit - 1, &idx->val); + + return idx; } static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) { - struct proc_dir_entry *pde = s->private; - struct clusterip_config *c = pde->data; - unsigned int *nodeidx = (unsigned int *)v; + struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; - *pos = ++(*nodeidx); - if (*pos >= c->num_local_nodes) { + *pos = ++idx->pos; + if (*pos >= idx->weight) { kfree(v); return NULL; } - return nodeidx; + idx->bit = ffs(idx->val); + clear_bit(idx->bit - 1, &idx->val); + return idx; } static void clusterip_seq_stop(struct seq_file *s, void *v) { kfree(v); - - read_unlock_bh(&clusterip_lock); } static int clusterip_seq_show(struct seq_file *s, void *v) { - struct proc_dir_entry *pde = s->private; - struct clusterip_config *c = pde->data; - unsigned int *nodeidx = (unsigned int *)v; + struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; - if (*nodeidx != 0) + if (idx->pos != 0) seq_putc(s, ','); - seq_printf(s, "%u", c->local_nodes[*nodeidx]); - if (*nodeidx == c->num_local_nodes-1) + seq_printf(s, "%u", idx->bit); + + if (idx->pos == idx->weight - 1) seq_putc(s, '\n'); return 0; @@ -716,7 +692,7 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input, { #define PROC_WRITELEN 10 char buffer[PROC_WRITELEN+1]; - struct proc_dir_entry *pde = PDE(file->f_dentry->d_inode); + struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); struct clusterip_config *c = pde->data; unsigned long nodenum; @@ -748,22 +724,17 @@ static struct file_operations clusterip_proc_fops = { #endif /* CONFIG_PROC_FS */ -static int init_or_cleanup(int fini) +static int __init ipt_clusterip_init(void) { int ret; - if (fini) - goto cleanup; - - if (ipt_register_target(&clusterip_tgt)) { - ret = -EINVAL; - goto cleanup_none; - } + ret = ipt_register_target(&clusterip_tgt); + if (ret < 0) + return ret; - if (nf_register_hook(&cip_arp_ops) < 0) { - ret = -EINVAL; + ret = nf_register_hook(&cip_arp_ops); + if (ret < 0) goto cleanup_target; - } #ifdef CONFIG_PROC_FS clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", proc_net); @@ -776,32 +747,27 @@ static int init_or_cleanup(int fini) printk(KERN_NOTICE "ClusterIP Version %s loaded successfully\n", CLUSTERIP_VERSION); - return 0; -cleanup: - printk(KERN_NOTICE "ClusterIP Version %s unloading\n", - CLUSTERIP_VERSION); #ifdef CONFIG_PROC_FS - remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent); -#endif cleanup_hook: nf_unregister_hook(&cip_arp_ops); +#endif /* CONFIG_PROC_FS */ cleanup_target: ipt_unregister_target(&clusterip_tgt); -cleanup_none: - return -EINVAL; -} - -static int __init init(void) -{ - return init_or_cleanup(0); + return ret; } -static void __exit fini(void) +static void __exit ipt_clusterip_fini(void) { - init_or_cleanup(1); + printk(KERN_NOTICE "ClusterIP Version %s unloading\n", + CLUSTERIP_VERSION); +#ifdef CONFIG_PROC_FS + remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent); +#endif + nf_unregister_hook(&cip_arp_ops); + ipt_unregister_target(&clusterip_tgt); } -module_init(init); -module_exit(fini); +module_init(ipt_clusterip_init); +module_exit(ipt_clusterip_fini);