X-Git-Url: http://ftp.safe.ca/?p=safe%2Fjmp%2Flinux-2.6;a=blobdiff_plain;f=net%2Fipv4%2Fnetfilter%2Fipt_CLUSTERIP.c;h=0886f96c736b12b1ddcc50bbb365a3d4e68d1a0d;hp=4fe28f2644759962e2dbcbd7a18c121e2a19deaf;hb=477781477a88f60c89003c852def4aedc6f78101;hpb=e905a9edab7f4f14f9213b52234e4a346c690911 diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 4fe28f2..0886f96 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -19,31 +19,20 @@ #include #include #include -#include #include - -#include - #include - #include #include #include -#include +#include +#include +#include #define CLUSTERIP_VERSION "0.8" -#define DEBUG_CLUSTERIP - -#ifdef DEBUG_CLUSTERIP -#define DEBUGP printk -#else -#define DEBUGP -#endif - MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte "); -MODULE_DESCRIPTION("iptables target for CLUSTERIP"); +MODULE_DESCRIPTION("Xtables: CLUSTERIP target"); struct clusterip_config { struct list_head list; /* list of all configs */ @@ -70,7 +59,7 @@ static LIST_HEAD(clusterip_configs); static DEFINE_RWLOCK(clusterip_lock); #ifdef CONFIG_PROC_FS -static struct file_operations clusterip_proc_fops; +static const struct file_operations clusterip_proc_fops; static struct proc_dir_entry *clusterip_procdir; #endif @@ -87,21 +76,14 @@ clusterip_config_put(struct clusterip_config *c) kfree(c); } -/* increase the count of entries(rules) using/referencing this config */ -static inline void -clusterip_config_entry_get(struct clusterip_config *c) -{ - atomic_inc(&c->entries); -} - /* decrease the count of entries using/referencing this config. If last * entry(rule) is removed, remove the config from lists, but don't free it * yet, since proc-files could still be holding references */ static inline void clusterip_config_entry_put(struct clusterip_config *c) { + write_lock_bh(&clusterip_lock); if (atomic_dec_and_test(&c->entries)) { - write_lock_bh(&clusterip_lock); list_del(&c->list); write_unlock_bh(&clusterip_lock); @@ -114,20 +96,19 @@ clusterip_config_entry_put(struct clusterip_config *c) #ifdef CONFIG_PROC_FS remove_proc_entry(c->pde->name, c->pde->parent); #endif + return; } + write_unlock_bh(&clusterip_lock); } static struct clusterip_config * __clusterip_config_find(__be32 clusterip) { - struct list_head *pos; + struct clusterip_config *c; - list_for_each(pos, &clusterip_configs) { - struct clusterip_config *c = list_entry(pos, - struct clusterip_config, list); - if (c->clusterip == clusterip) { + list_for_each_entry(c, &clusterip_configs, list) { + if (c->clusterip == clusterip) return c; - } } return NULL; @@ -158,13 +139,12 @@ clusterip_config_init_nodelist(struct clusterip_config *c, { int n; - for (n = 0; n < i->num_local_nodes; n++) { + for (n = 0; n < i->num_local_nodes; n++) set_bit(i->local_nodes[n] - 1, &c->local_nodes); - } } static struct clusterip_config * -clusterip_config_init(struct ipt_clusterip_tgt_info *i, __be32 ip, +clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, struct net_device *dev) { struct clusterip_config *c; @@ -188,16 +168,15 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, __be32 ip, char buffer[16]; /* create proc dir entry */ - sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip)); - c->pde = create_proc_entry(buffer, S_IWUSR|S_IRUSR, - clusterip_procdir); + sprintf(buffer, "%pI4", &ip); + c->pde = proc_create_data(buffer, S_IWUSR|S_IRUSR, + clusterip_procdir, + &clusterip_proc_fops, c); if (!c->pde) { kfree(c); return NULL; } } - c->pde->proc_fops = &clusterip_proc_fops; - c->pde->data = c; #endif write_lock_bh(&clusterip_lock); @@ -223,27 +202,28 @@ clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) return 0; } -static int +static bool clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) { if (nodenum == 0 || nodenum > c->num_total_nodes) - return 1; + return true; if (test_and_clear_bit(nodenum - 1, &c->local_nodes)) - return 0; + return false; - return 1; + return true; } #endif static inline u_int32_t -clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) +clusterip_hashfn(const struct sk_buff *skb, + const struct clusterip_config *config) { - struct iphdr *iph = skb->nh.iph; + const struct iphdr *iph = ip_hdr(skb); unsigned long hashval; u_int16_t sport, dport; - u_int16_t *ports; + const u_int16_t *ports; switch (iph->protocol) { case IPPROTO_TCP: @@ -252,15 +232,14 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) case IPPROTO_SCTP: case IPPROTO_DCCP: case IPPROTO_ICMP: - ports = (void *)iph+iph->ihl*4; + ports = (const void *)iph+iph->ihl*4; sport = ports[0]; dport = ports[1]; break; default: - if (net_ratelimit()) { + if (net_ratelimit()) printk(KERN_NOTICE "CLUSTERIP: unknown protocol `%u'\n", iph->protocol); - } sport = dport = 0; } @@ -288,11 +267,11 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) } /* node numbers are 1..n, not 0..n */ - return ((hashval % config->num_total_nodes)+1); + return (((u64)hashval * config->num_total_nodes) >> 32) + 1; } static inline int -clusterip_responsible(struct clusterip_config *config, u_int32_t hash) +clusterip_responsible(const struct clusterip_config *config, u_int32_t hash) { return test_bit(hash - 1, &config->local_nodes); } @@ -302,23 +281,19 @@ clusterip_responsible(struct clusterip_config *config, u_int32_t hash) ***********************************************************************/ static unsigned int -target(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) +clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ipt_clusterip_tgt_info *cipinfo = targinfo; + const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; + struct nf_conn *ct; enum ip_conntrack_info ctinfo; - u_int32_t *mark, hash; + u_int32_t hash; /* don't need to clusterip_config_get() here, since refcount * is only decremented by destroy() - and ip_tables guarantees * that the ->target() function isn't called after ->destroy() */ - mark = nf_ct_get_mark((*pskb), &ctinfo); - if (mark == NULL) { + ct = nf_ct_get(skb, &ctinfo); + if (ct == NULL) { printk(KERN_ERR "CLUSTERIP: no conntrack!\n"); /* FIXME: need to drop invalid ones, since replies * to outgoing connections of other nodes will be @@ -328,20 +303,20 @@ target(struct sk_buff **pskb, /* special case: ICMP error handling. conntrack distinguishes between * error messages (RELATED) and information requests (see below) */ - if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP - && (ctinfo == IP_CT_RELATED - || ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY)) + if (ip_hdr(skb)->protocol == IPPROTO_ICMP && + (ctinfo == IP_CT_RELATED || + ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)) return XT_CONTINUE; /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here * on, which all have an ID field [relevant for hashing]. */ - hash = clusterip_hashfn(*pskb, cipinfo->config); + hash = clusterip_hashfn(skb, cipinfo->config); switch (ctinfo) { case IP_CT_NEW: - *mark = hash; + ct->mark = hash; break; case IP_CT_RELATED: case IP_CT_RELATED+IP_CT_IS_REPLY: @@ -355,32 +330,27 @@ target(struct sk_buff **pskb, break; } -#ifdef DEBUG_CLUSTERP - DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); +#ifdef DEBUG + nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); #endif - DEBUGP("hash=%u ct_hash=%u ", hash, *mark); + pr_debug("hash=%u ct_hash=%u ", hash, ct->mark); if (!clusterip_responsible(cipinfo->config, hash)) { - DEBUGP("not responsible\n"); + pr_debug("not responsible\n"); return NF_DROP; } - DEBUGP("responsible\n"); + pr_debug("responsible\n"); /* despite being received via linklayer multicast, this is * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */ - (*pskb)->pkt_type = PACKET_HOST; + skb->pkt_type = PACKET_HOST; return XT_CONTINUE; } -static int -checkentry(const char *tablename, - const void *e_void, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool clusterip_tg_check(const struct xt_tgchk_param *par) { - struct ipt_clusterip_tgt_info *cipinfo = targinfo; - const struct ipt_entry *e = e_void; + struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; + const struct ipt_entry *e = par->entryinfo; struct clusterip_config *config; @@ -389,52 +359,34 @@ checkentry(const char *tablename, cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) { printk(KERN_WARNING "CLUSTERIP: unknown mode `%u'\n", cipinfo->hash_mode); - return 0; + return false; } - if (e->ip.dmsk.s_addr != htonl(0xffffffff) - || e->ip.dst.s_addr == 0) { + if (e->ip.dmsk.s_addr != htonl(0xffffffff) || + e->ip.dst.s_addr == 0) { printk(KERN_ERR "CLUSTERIP: Please specify destination IP\n"); - return 0; + return false; } /* FIXME: further sanity checks */ config = clusterip_config_find_get(e->ip.dst.s_addr, 1); - if (config) { - if (cipinfo->config != NULL) { - /* Case A: This is an entry that gets reloaded, since - * it still has a cipinfo->config pointer. Simply - * increase the entry refcount and return */ - if (cipinfo->config != config) { - printk(KERN_ERR "CLUSTERIP: Reloaded entry " - "has invalid config pointer!\n"); - return 0; - } - clusterip_config_entry_get(cipinfo->config); - } else { - /* Case B: This is a new rule referring to an existing - * clusterip config. */ - cipinfo->config = config; - clusterip_config_entry_get(cipinfo->config); - } - } else { - /* Case C: This is a completely new clusterip config */ + if (!config) { if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { - printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr)); - return 0; + printk(KERN_WARNING "CLUSTERIP: no config found for %pI4, need 'new'\n", &e->ip.dst.s_addr); + return false; } else { struct net_device *dev; if (e->ip.iniface[0] == '\0') { printk(KERN_WARNING "CLUSTERIP: Please specify an interface name\n"); - return 0; + return false; } - dev = dev_get_by_name(e->ip.iniface); + dev = dev_get_by_name(&init_net, e->ip.iniface); if (!dev) { printk(KERN_WARNING "CLUSTERIP: no such interface %s\n", e->ip.iniface); - return 0; + return false; } config = clusterip_config_init(cipinfo, @@ -442,26 +394,26 @@ checkentry(const char *tablename, if (!config) { printk(KERN_WARNING "CLUSTERIP: cannot allocate config\n"); dev_put(dev); - return 0; + return false; } dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0); } - cipinfo->config = config; } + cipinfo->config = config; - if (nf_ct_l3proto_try_module_get(target->family) < 0) { + if (nf_ct_l3proto_try_module_get(par->target->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " - "proto=%d\n", target->family); - return 0; + "proto=%u\n", par->target->family); + return false; } - return 1; + return true; } /* drop reference count of cluster config when rule is deleted */ -static void destroy(const struct xt_target *target, void *targinfo) +static void clusterip_tg_destroy(const struct xt_tgdtor_param *par) { - struct ipt_clusterip_tgt_info *cipinfo = targinfo; + const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; /* if no more entries are referencing the config, remove it * from the list and destroy the proc entry */ @@ -469,16 +421,33 @@ static void destroy(const struct xt_target *target, void *targinfo) clusterip_config_put(cipinfo->config); - nf_ct_l3proto_module_put(target->family); + nf_ct_l3proto_module_put(par->target->family); } -static struct xt_target clusterip_tgt = { +#ifdef CONFIG_COMPAT +struct compat_ipt_clusterip_tgt_info +{ + u_int32_t flags; + u_int8_t clustermac[6]; + u_int16_t num_total_nodes; + u_int16_t num_local_nodes; + u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; + u_int32_t hash_mode; + u_int32_t hash_initval; + compat_uptr_t config; +}; +#endif /* CONFIG_COMPAT */ + +static struct xt_target clusterip_tg_reg __read_mostly = { .name = "CLUSTERIP", - .family = AF_INET, - .target = target, + .family = NFPROTO_IPV4, + .target = clusterip_tg, + .checkentry = clusterip_tg_check, + .destroy = clusterip_tg_destroy, .targetsize = sizeof(struct ipt_clusterip_tgt_info), - .checkentry = checkentry, - .destroy = destroy, +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct compat_ipt_clusterip_tgt_info), +#endif /* CONFIG_COMPAT */ .me = THIS_MODULE }; @@ -495,47 +464,45 @@ struct arp_payload { __be32 dst_ip; } __attribute__ ((packed)); -#ifdef CLUSTERIP_DEBUG +#ifdef DEBUG static void arp_print(struct arp_payload *payload) { #define HBUFFERLEN 30 char hbuffer[HBUFFERLEN]; int j,k; - const char hexbuf[]= "0123456789abcdef"; for (k=0, j=0; k < HBUFFERLEN-3 && j < ETH_ALEN; j++) { - hbuffer[k++]=hexbuf[(payload->src_hw[j]>>4)&15]; - hbuffer[k++]=hexbuf[payload->src_hw[j]&15]; + hbuffer[k++] = hex_asc_hi(payload->src_hw[j]); + hbuffer[k++] = hex_asc_lo(payload->src_hw[j]); hbuffer[k++]=':'; } hbuffer[--k]='\0'; - printk("src %u.%u.%u.%u@%s, dst %u.%u.%u.%u\n", - NIPQUAD(payload->src_ip), hbuffer, - NIPQUAD(payload->dst_ip)); + printk("src %pI4@%s, dst %pI4\n", + &payload->src_ip, hbuffer, &payload->dst_ip); } #endif static unsigned int arp_mangle(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct arphdr *arp = (*pskb)->nh.arph; + struct arphdr *arp = arp_hdr(skb); struct arp_payload *payload; struct clusterip_config *c; /* we don't care about non-ethernet and non-ipv4 ARP */ - if (arp->ar_hrd != htons(ARPHRD_ETHER) - || arp->ar_pro != htons(ETH_P_IP) - || arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN) + if (arp->ar_hrd != htons(ARPHRD_ETHER) || + arp->ar_pro != htons(ETH_P_IP) || + arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN) return NF_ACCEPT; /* we only want to mangle arp requests and replies */ - if (arp->ar_op != htons(ARPOP_REPLY) - && arp->ar_op != htons(ARPOP_REQUEST)) + if (arp->ar_op != htons(ARPOP_REPLY) && + arp->ar_op != htons(ARPOP_REQUEST)) return NF_ACCEPT; payload = (void *)(arp+1); @@ -551,8 +518,9 @@ arp_mangle(unsigned int hook, * this wouldn't work, since we didn't subscribe the mcast group on * other interfaces */ if (c->dev != out) { - DEBUGP("CLUSTERIP: not mangling arp reply on different " - "interface: cip'%s'-skb'%s'\n", c->dev->name, out->name); + pr_debug("CLUSTERIP: not mangling arp reply on different " + "interface: cip'%s'-skb'%s'\n", + c->dev->name, out->name); clusterip_config_put(c); return NF_ACCEPT; } @@ -560,8 +528,8 @@ arp_mangle(unsigned int hook, /* mangle reply hardware address */ memcpy(payload->src_hw, c->clustermac, arp->ar_hln); -#ifdef CLUSTERIP_DEBUG - DEBUGP(KERN_DEBUG "CLUSTERIP mangled arp reply: "); +#ifdef DEBUG + pr_debug(KERN_DEBUG "CLUSTERIP mangled arp reply: "); arp_print(payload); #endif @@ -570,9 +538,9 @@ arp_mangle(unsigned int hook, return NF_ACCEPT; } -static struct nf_hook_ops cip_arp_ops = { +static struct nf_hook_ops cip_arp_ops __read_mostly = { .hook = arp_mangle, - .pf = NF_ARP, + .pf = NFPROTO_ARP, .hooknum = NF_ARP_OUT, .priority = -1 }; @@ -592,8 +560,7 @@ struct clusterip_seq_position { static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) { - struct proc_dir_entry *pde = s->private; - struct clusterip_config *c = pde->data; + struct clusterip_config *c = s->private; unsigned int weight; u_int32_t local_nodes; struct clusterip_seq_position *idx; @@ -619,7 +586,7 @@ static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) { - struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; + struct clusterip_seq_position *idx = v; *pos = ++idx->pos; if (*pos >= idx->weight) { @@ -638,7 +605,7 @@ static void clusterip_seq_stop(struct seq_file *s, void *v) static int clusterip_seq_show(struct seq_file *s, void *v) { - struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; + struct clusterip_seq_position *idx = v; if (idx->pos != 0) seq_putc(s, ','); @@ -651,7 +618,7 @@ static int clusterip_seq_show(struct seq_file *s, void *v) return 0; } -static struct seq_operations clusterip_seq_ops = { +static const struct seq_operations clusterip_seq_ops = { .start = clusterip_seq_start, .next = clusterip_seq_next, .stop = clusterip_seq_stop, @@ -664,10 +631,9 @@ static int clusterip_proc_open(struct inode *inode, struct file *file) if (!ret) { struct seq_file *sf = file->private_data; - struct proc_dir_entry *pde = PDE(inode); - struct clusterip_config *c = pde->data; + struct clusterip_config *c = PDE(inode)->data; - sf->private = pde; + sf->private = c; clusterip_config_get(c); } @@ -677,8 +643,7 @@ static int clusterip_proc_open(struct inode *inode, struct file *file) static int clusterip_proc_release(struct inode *inode, struct file *file) { - struct proc_dir_entry *pde = PDE(inode); - struct clusterip_config *c = pde->data; + struct clusterip_config *c = PDE(inode)->data; int ret; ret = seq_release(inode, file); @@ -692,10 +657,9 @@ static int clusterip_proc_release(struct inode *inode, struct file *file) static ssize_t clusterip_proc_write(struct file *file, const char __user *input, size_t size, loff_t *ofs) { + struct clusterip_config *c = PDE(file->f_path.dentry->d_inode)->data; #define PROC_WRITELEN 10 char buffer[PROC_WRITELEN+1]; - struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); - struct clusterip_config *c = pde->data; unsigned long nodenum; if (copy_from_user(buffer, input, PROC_WRITELEN)) @@ -715,7 +679,7 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input, return size; } -static struct file_operations clusterip_proc_fops = { +static const struct file_operations clusterip_proc_fops = { .owner = THIS_MODULE, .open = clusterip_proc_open, .read = seq_read, @@ -726,11 +690,11 @@ static struct file_operations clusterip_proc_fops = { #endif /* CONFIG_PROC_FS */ -static int __init ipt_clusterip_init(void) +static int __init clusterip_tg_init(void) { int ret; - ret = xt_register_target(&clusterip_tgt); + ret = xt_register_target(&clusterip_tg_reg); if (ret < 0) return ret; @@ -739,7 +703,7 @@ static int __init ipt_clusterip_init(void) goto cleanup_target; #ifdef CONFIG_PROC_FS - clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", proc_net); + clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", init_net.proc_net); if (!clusterip_procdir) { printk(KERN_ERR "CLUSTERIP: Unable to proc dir entry\n"); ret = -ENOMEM; @@ -756,11 +720,11 @@ cleanup_hook: nf_unregister_hook(&cip_arp_ops); #endif /* CONFIG_PROC_FS */ cleanup_target: - xt_unregister_target(&clusterip_tgt); + xt_unregister_target(&clusterip_tg_reg); return ret; } -static void __exit ipt_clusterip_fini(void) +static void __exit clusterip_tg_exit(void) { printk(KERN_NOTICE "ClusterIP Version %s unloading\n", CLUSTERIP_VERSION); @@ -768,8 +732,8 @@ static void __exit ipt_clusterip_fini(void) remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent); #endif nf_unregister_hook(&cip_arp_ops); - xt_unregister_target(&clusterip_tgt); + xt_unregister_target(&clusterip_tg_reg); } -module_init(ipt_clusterip_init); -module_exit(ipt_clusterip_fini); +module_init(clusterip_tg_init); +module_exit(clusterip_tg_exit);