IPVS: Add and bind IPv6 xmit functions
[safe/jmp/linux-2.6] / net / ipv4 / ipvs / ip_vs_conn.c
index 6feeb1f..f5dddad 100644 (file)
@@ -5,8 +5,6 @@
  *              high-performance and highly available server based on a
  *              cluster of servers.
  *
- * Version:     $Id: ip_vs_conn.c,v 1.31 2003/04/18 09:03:16 wensong Exp $
- *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *              Peter Kese <peter.kese@ijs.si>
  *              Julian Anastasov <ja@ssi.bg>
@@ -35,6 +33,7 @@
 #include <linux/jhash.h>
 #include <linux/random.h>
 
+#include <net/net_namespace.h>
 #include <net/ip_vs.h>
 
 
@@ -115,9 +114,18 @@ static inline void ct_write_unlock_bh(unsigned key)
 /*
  *     Returns hash value for IPVS connection entry
  */
-static unsigned int ip_vs_conn_hashkey(unsigned proto, __be32 addr, __be16 port)
+static unsigned int ip_vs_conn_hashkey(int af, unsigned proto,
+                                      const union nf_inet_addr *addr,
+                                      __be16 port)
 {
-       return jhash_3words((__force u32)addr, (__force u32)port, proto, ip_vs_conn_rnd)
+#ifdef CONFIG_IP_VS_IPV6
+       if (af == AF_INET6)
+               return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
+                                   (__force u32)port, proto, ip_vs_conn_rnd)
+                       & IP_VS_CONN_TAB_MASK;
+#endif
+       return jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
+                           ip_vs_conn_rnd)
                & IP_VS_CONN_TAB_MASK;
 }
 
@@ -132,7 +140,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
        int ret;
 
        /* Hash by protocol, client address and port */
-       hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport);
+       hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
 
        ct_write_lock(hash);
 
@@ -163,7 +171,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
        int ret;
 
        /* unhash it and decrease its reference counter */
-       hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport);
+       hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
 
        ct_write_lock(hash);
 
@@ -188,20 +196,23 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
  *     d_addr, d_port: pkt dest address (load balancer)
  */
 static inline struct ip_vs_conn *__ip_vs_conn_in_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
 {
        unsigned hash;
        struct ip_vs_conn *cp;
 
-       hash = ip_vs_conn_hashkey(protocol, s_addr, s_port);
+       hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port);
 
        ct_read_lock(hash);
 
        list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
-               if (s_addr==cp->caddr && s_port==cp->cport &&
-                   d_port==cp->vport && d_addr==cp->vaddr &&
+               if (cp->af == af &&
+                   ip_vs_addr_equal(af, s_addr, &cp->caddr) &&
+                   ip_vs_addr_equal(af, d_addr, &cp->vaddr) &&
+                   s_port == cp->cport && d_port == cp->vport &&
                    ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
-                   protocol==cp->protocol) {
+                   protocol == cp->protocol) {
                        /* HIT */
                        atomic_inc(&cp->refcnt);
                        ct_read_unlock(hash);
@@ -215,39 +226,44 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get
 }
 
 struct ip_vs_conn *ip_vs_conn_in_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
 {
        struct ip_vs_conn *cp;
 
-       cp = __ip_vs_conn_in_get(protocol, s_addr, s_port, d_addr, d_port);
+       cp = __ip_vs_conn_in_get(af, protocol, s_addr, s_port, d_addr, d_port);
        if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt))
-               cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port);
+               cp = __ip_vs_conn_in_get(af, protocol, s_addr, 0, d_addr,
+                                        d_port);
 
-       IP_VS_DBG(9, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
-                 ip_vs_proto_name(protocol),
-                 NIPQUAD(s_addr), ntohs(s_port),
-                 NIPQUAD(d_addr), ntohs(d_port),
-                 cp?"hit":"not hit");
+       IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n",
+                     ip_vs_proto_name(protocol),
+                     IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
+                     IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
+                     cp ? "hit" : "not hit");
 
        return cp;
 }
 
 /* Get reference to connection template */
 struct ip_vs_conn *ip_vs_ct_in_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
 {
        unsigned hash;
        struct ip_vs_conn *cp;
 
-       hash = ip_vs_conn_hashkey(protocol, s_addr, s_port);
+       hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port);
 
        ct_read_lock(hash);
 
        list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
-               if (s_addr==cp->caddr && s_port==cp->cport &&
-                   d_port==cp->vport && d_addr==cp->vaddr &&
+               if (cp->af == af &&
+                   ip_vs_addr_equal(af, s_addr, &cp->caddr) &&
+                   ip_vs_addr_equal(af, d_addr, &cp->vaddr) &&
+                   s_port == cp->cport && d_port == cp->vport &&
                    cp->flags & IP_VS_CONN_F_TEMPLATE &&
-                   protocol==cp->protocol) {
+                   protocol == cp->protocol) {
                        /* HIT */
                        atomic_inc(&cp->refcnt);
                        goto out;
@@ -258,11 +274,11 @@ struct ip_vs_conn *ip_vs_ct_in_get
   out:
        ct_read_unlock(hash);
 
-       IP_VS_DBG(9, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
-                 ip_vs_proto_name(protocol),
-                 NIPQUAD(s_addr), ntohs(s_port),
-                 NIPQUAD(d_addr), ntohs(d_port),
-                 cp?"hit":"not hit");
+       IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n",
+                     ip_vs_proto_name(protocol),
+                     IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
+                     IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
+                     cp ? "hit" : "not hit");
 
        return cp;
 }
@@ -274,7 +290,8 @@ struct ip_vs_conn *ip_vs_ct_in_get
  *     d_addr, d_port: pkt dest address (foreign host)
  */
 struct ip_vs_conn *ip_vs_conn_out_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
 {
        unsigned hash;
        struct ip_vs_conn *cp, *ret=NULL;
@@ -282,13 +299,15 @@ struct ip_vs_conn *ip_vs_conn_out_get
        /*
         *      Check for "full" addressed entries
         */
-       hash = ip_vs_conn_hashkey(protocol, d_addr, d_port);
+       hash = ip_vs_conn_hashkey(af, protocol, d_addr, d_port);
 
        ct_read_lock(hash);
 
        list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
-               if (d_addr == cp->caddr && d_port == cp->cport &&
-                   s_port == cp->dport && s_addr == cp->daddr &&
+               if (cp->af == af &&
+                   ip_vs_addr_equal(af, d_addr, &cp->caddr) &&
+                   ip_vs_addr_equal(af, s_addr, &cp->daddr) &&
+                   d_port == cp->cport && s_port == cp->dport &&
                    protocol == cp->protocol) {
                        /* HIT */
                        atomic_inc(&cp->refcnt);
@@ -299,11 +318,11 @@ struct ip_vs_conn *ip_vs_conn_out_get
 
        ct_read_unlock(hash);
 
-       IP_VS_DBG(9, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
-                 ip_vs_proto_name(protocol),
-                 NIPQUAD(s_addr), ntohs(s_port),
-                 NIPQUAD(d_addr), ntohs(d_port),
-                 ret?"hit":"not hit");
+       IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n",
+                     ip_vs_proto_name(protocol),
+                     IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
+                     IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
+                     ret ? "hit" : "not hit");
 
        return ret;
 }
@@ -370,6 +389,33 @@ static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp)
        }
 }
 
+#ifdef CONFIG_IP_VS_IPV6
+static inline void ip_vs_bind_xmit_v6(struct ip_vs_conn *cp)
+{
+       switch (IP_VS_FWD_METHOD(cp)) {
+       case IP_VS_CONN_F_MASQ:
+               cp->packet_xmit = ip_vs_nat_xmit_v6;
+               break;
+
+       case IP_VS_CONN_F_TUNNEL:
+               cp->packet_xmit = ip_vs_tunnel_xmit_v6;
+               break;
+
+       case IP_VS_CONN_F_DROUTE:
+               cp->packet_xmit = ip_vs_dr_xmit_v6;
+               break;
+
+       case IP_VS_CONN_F_LOCALNODE:
+               cp->packet_xmit = ip_vs_null_xmit;
+               break;
+
+       case IP_VS_CONN_F_BYPASS:
+               cp->packet_xmit = ip_vs_bypass_xmit_v6;
+               break;
+       }
+}
+#endif
+
 
 static inline int ip_vs_dest_totalconns(struct ip_vs_dest *dest)
 {
@@ -392,16 +438,24 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
        atomic_inc(&dest->refcnt);
 
        /* Bind with the destination and its corresponding transmitter */
-       cp->flags |= atomic_read(&dest->conn_flags);
+       if ((cp->flags & IP_VS_CONN_F_SYNC) &&
+           (!(cp->flags & IP_VS_CONN_F_TEMPLATE)))
+               /* if the connection is not template and is created
+                * by sync, preserve the activity flag.
+                */
+               cp->flags |= atomic_read(&dest->conn_flags) &
+                            (~IP_VS_CONN_F_INACTIVE);
+       else
+               cp->flags |= atomic_read(&dest->conn_flags);
        cp->dest = dest;
 
        IP_VS_DBG(7, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
                  "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
                  "dest->refcnt:%d\n",
                  ip_vs_proto_name(cp->protocol),
-                 NIPQUAD(cp->caddr), ntohs(cp->cport),
-                 NIPQUAD(cp->vaddr), ntohs(cp->vport),
-                 NIPQUAD(cp->daddr), ntohs(cp->dport),
+                 NIPQUAD(cp->caddr.ip), ntohs(cp->cport),
+                 NIPQUAD(cp->vaddr.ip), ntohs(cp->vport),
+                 NIPQUAD(cp->daddr.ip), ntohs(cp->dport),
                  ip_vs_fwd_tag(cp), cp->state,
                  cp->flags, atomic_read(&cp->refcnt),
                  atomic_read(&dest->refcnt));
@@ -411,7 +465,11 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
                /* It is a normal connection, so increase the inactive
                   connection counter because it is in TCP SYNRECV
                   state (inactive) or other protocol inacive state */
-               atomic_inc(&dest->inactconns);
+               if ((cp->flags & IP_VS_CONN_F_SYNC) &&
+                   (!(cp->flags & IP_VS_CONN_F_INACTIVE)))
+                       atomic_inc(&dest->activeconns);
+               else
+                       atomic_inc(&dest->inactconns);
        } else {
                /* It is a persistent connection/template, so increase
                   the peristent connection counter */
@@ -425,6 +483,24 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
 
 
 /*
+ * Check if there is a destination for the connection, if so
+ * bind the connection to the destination.
+ */
+struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
+{
+       struct ip_vs_dest *dest;
+
+       if ((cp) && (!cp->dest)) {
+               dest = ip_vs_find_dest(cp->daddr.ip, cp->dport,
+                                      cp->vaddr.ip, cp->vport, cp->protocol);
+               ip_vs_bind_dest(cp, dest);
+               return dest;
+       } else
+               return NULL;
+}
+
+
+/*
  *     Unbind a connection entry with its VS destination
  *     Called by the ip_vs_conn_expire function.
  */
@@ -439,9 +515,9 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
                  "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
                  "dest->refcnt:%d\n",
                  ip_vs_proto_name(cp->protocol),
-                 NIPQUAD(cp->caddr), ntohs(cp->cport),
-                 NIPQUAD(cp->vaddr), ntohs(cp->vport),
-                 NIPQUAD(cp->daddr), ntohs(cp->dport),
+                 NIPQUAD(cp->caddr.ip), ntohs(cp->cport),
+                 NIPQUAD(cp->vaddr.ip), ntohs(cp->vport),
+                 NIPQUAD(cp->daddr.ip), ntohs(cp->dport),
                  ip_vs_fwd_tag(cp), cp->state,
                  cp->flags, atomic_read(&cp->refcnt),
                  atomic_read(&dest->refcnt));
@@ -501,9 +577,9 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
                          "protocol %s s:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
                          "-> d:%u.%u.%u.%u:%d\n",
                          ip_vs_proto_name(ct->protocol),
-                         NIPQUAD(ct->caddr), ntohs(ct->cport),
-                         NIPQUAD(ct->vaddr), ntohs(ct->vport),
-                         NIPQUAD(ct->daddr), ntohs(ct->dport));
+                         NIPQUAD(ct->caddr.ip), ntohs(ct->cport),
+                         NIPQUAD(ct->vaddr.ip), ntohs(ct->vport),
+                         NIPQUAD(ct->daddr.ip), ntohs(ct->dport));
 
                /*
                 * Invalidate the connection template
@@ -596,30 +672,29 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
  *     Create a new connection entry and hash it into the ip_vs_conn_tab
  */
 struct ip_vs_conn *
-ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport,
-              __be32 daddr, __be16 dport, unsigned flags,
+ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
+              const union nf_inet_addr *vaddr, __be16 vport,
+              const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
               struct ip_vs_dest *dest)
 {
        struct ip_vs_conn *cp;
        struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
 
-       cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC);
+       cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
        if (cp == NULL) {
                IP_VS_ERR_RL("ip_vs_conn_new: no memory available.\n");
                return NULL;
        }
 
-       memset(cp, 0, sizeof(*cp));
        INIT_LIST_HEAD(&cp->c_list);
-       init_timer(&cp->timer);
-       cp->timer.data     = (unsigned long)cp;
-       cp->timer.function = ip_vs_conn_expire;
+       setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
+       cp->af             = af;
        cp->protocol       = proto;
-       cp->caddr          = caddr;
+       ip_vs_addr_copy(af, &cp->caddr, caddr);
        cp->cport          = cport;
-       cp->vaddr          = vaddr;
+       ip_vs_addr_copy(af, &cp->vaddr, vaddr);
        cp->vport          = vport;
-       cp->daddr          = daddr;
+       ip_vs_addr_copy(af, &cp->daddr, daddr);
        cp->dport          = dport;
        cp->flags          = flags;
        spin_lock_init(&cp->lock);
@@ -646,7 +721,12 @@ ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport
        cp->timeout = 3*HZ;
 
        /* Bind its packet transmitter */
-       ip_vs_bind_xmit(cp);
+#ifdef CONFIG_IP_VS_IPV6
+       if (af == AF_INET6)
+               ip_vs_bind_xmit_v6(cp);
+       else
+#endif
+               ip_vs_bind_xmit(cp);
 
        if (unlikely(pp && atomic_read(&pp->appcnt)))
                ip_vs_bind_app(cp, pp);
@@ -737,16 +817,16 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
                seq_printf(seq,
                        "%-3s %08X %04X %08X %04X %08X %04X %-11s %7lu\n",
                                ip_vs_proto_name(cp->protocol),
-                               ntohl(cp->caddr), ntohs(cp->cport),
-                               ntohl(cp->vaddr), ntohs(cp->vport),
-                               ntohl(cp->daddr), ntohs(cp->dport),
+                               ntohl(cp->caddr.ip), ntohs(cp->cport),
+                               ntohl(cp->vaddr.ip), ntohs(cp->vport),
+                               ntohl(cp->daddr.ip), ntohs(cp->dport),
                                ip_vs_state_name(cp->protocol, cp->state),
                                (cp->timer.expires-jiffies)/HZ);
        }
        return 0;
 }
 
-static struct seq_operations ip_vs_conn_seq_ops = {
+static const struct seq_operations ip_vs_conn_seq_ops = {
        .start = ip_vs_conn_seq_start,
        .next  = ip_vs_conn_seq_next,
        .stop  = ip_vs_conn_seq_stop,
@@ -758,13 +838,64 @@ static int ip_vs_conn_open(struct inode *inode, struct file *file)
        return seq_open(file, &ip_vs_conn_seq_ops);
 }
 
-static struct file_operations ip_vs_conn_fops = {
+static const struct file_operations ip_vs_conn_fops = {
        .owner   = THIS_MODULE,
        .open    = ip_vs_conn_open,
        .read    = seq_read,
        .llseek  = seq_lseek,
        .release = seq_release,
 };
+
+static const char *ip_vs_origin_name(unsigned flags)
+{
+       if (flags & IP_VS_CONN_F_SYNC)
+               return "SYNC";
+       else
+               return "LOCAL";
+}
+
+static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
+{
+
+       if (v == SEQ_START_TOKEN)
+               seq_puts(seq,
+   "Pro FromIP   FPrt ToIP     TPrt DestIP   DPrt State       Origin Expires\n");
+       else {
+               const struct ip_vs_conn *cp = v;
+
+               seq_printf(seq,
+                       "%-3s %08X %04X %08X %04X %08X %04X %-11s %-6s %7lu\n",
+                               ip_vs_proto_name(cp->protocol),
+                               ntohl(cp->caddr.ip), ntohs(cp->cport),
+                               ntohl(cp->vaddr.ip), ntohs(cp->vport),
+                               ntohl(cp->daddr.ip), ntohs(cp->dport),
+                               ip_vs_state_name(cp->protocol, cp->state),
+                               ip_vs_origin_name(cp->flags),
+                               (cp->timer.expires-jiffies)/HZ);
+       }
+       return 0;
+}
+
+static const struct seq_operations ip_vs_conn_sync_seq_ops = {
+       .start = ip_vs_conn_seq_start,
+       .next  = ip_vs_conn_seq_next,
+       .stop  = ip_vs_conn_seq_stop,
+       .show  = ip_vs_conn_sync_seq_show,
+};
+
+static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
+{
+       return seq_open(file, &ip_vs_conn_sync_seq_ops);
+}
+
+static const struct file_operations ip_vs_conn_sync_fops = {
+       .owner   = THIS_MODULE,
+       .open    = ip_vs_conn_sync_open,
+       .read    = seq_read,
+       .llseek  = seq_lseek,
+       .release = seq_release,
+};
+
 #endif
 
 
@@ -888,7 +1019,7 @@ static void ip_vs_conn_flush(void)
 }
 
 
-int ip_vs_conn_init(void)
+int __init ip_vs_conn_init(void)
 {
        int idx;
 
@@ -902,7 +1033,7 @@ int ip_vs_conn_init(void)
        /* Allocate ip_vs_conn slab cache */
        ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn",
                                              sizeof(struct ip_vs_conn), 0,
-                                             SLAB_HWCACHE_ALIGN, NULL, NULL);
+                                             SLAB_HWCACHE_ALIGN, NULL);
        if (!ip_vs_conn_cachep) {
                vfree(ip_vs_conn_tab);
                return -ENOMEM;
@@ -923,7 +1054,8 @@ int ip_vs_conn_init(void)
                rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
        }
 
-       proc_net_fops_create("ip_vs_conn", 0, &ip_vs_conn_fops);
+       proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops);
+       proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
 
        /* calculate the random value for connection hash */
        get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
@@ -939,6 +1071,7 @@ void ip_vs_conn_cleanup(void)
 
        /* Release the empty cache */
        kmem_cache_destroy(ip_vs_conn_cachep);
-       proc_net_remove("ip_vs_conn");
+       proc_net_remove(&init_net, "ip_vs_conn");
+       proc_net_remove(&init_net, "ip_vs_conn_sync");
        vfree(ip_vs_conn_tab);
 }