Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6 into lvs-next-2.6
authorSimon Horman <horms@verge.net.au>
Tue, 9 Sep 2008 23:14:52 +0000 (09:14 +1000)
committerSimon Horman <horms@verge.net.au>
Tue, 9 Sep 2008 23:14:52 +0000 (09:14 +1000)
23 files changed:
include/net/ip_vs.h
net/ipv4/ipvs/Kconfig
net/ipv4/ipvs/ip_vs_conn.c
net/ipv4/ipvs/ip_vs_core.c
net/ipv4/ipvs/ip_vs_ctl.c
net/ipv4/ipvs/ip_vs_dh.c
net/ipv4/ipvs/ip_vs_est.c
net/ipv4/ipvs/ip_vs_ftp.c
net/ipv4/ipvs/ip_vs_lblc.c
net/ipv4/ipvs/ip_vs_lblcr.c
net/ipv4/ipvs/ip_vs_lc.c
net/ipv4/ipvs/ip_vs_nq.c
net/ipv4/ipvs/ip_vs_proto.c
net/ipv4/ipvs/ip_vs_proto_ah_esp.c
net/ipv4/ipvs/ip_vs_proto_tcp.c
net/ipv4/ipvs/ip_vs_proto_udp.c
net/ipv4/ipvs/ip_vs_rr.c
net/ipv4/ipvs/ip_vs_sed.c
net/ipv4/ipvs/ip_vs_sh.c
net/ipv4/ipvs/ip_vs_sync.c
net/ipv4/ipvs/ip_vs_wlc.c
net/ipv4/ipvs/ip_vs_wrr.c
net/ipv4/ipvs/ip_vs_xmit.c

index a25ad24..33e2ac6 100644 (file)
 #include <linux/timer.h>
 
 #include <net/checksum.h>
+#include <linux/netfilter.h>           /* for union nf_inet_addr */
+#include <linux/ipv6.h>                        /* for struct ipv6hdr */
+#include <net/ipv6.h>                  /* for ipv6_addr_copy */
+
+struct ip_vs_iphdr {
+       int len;
+       __u8 protocol;
+       union nf_inet_addr saddr;
+       union nf_inet_addr daddr;
+};
+
+static inline void
+ip_vs_fill_iphdr(int af, const void *nh, struct ip_vs_iphdr *iphdr)
+{
+#ifdef CONFIG_IP_VS_IPV6
+       if (af == AF_INET6) {
+               const struct ipv6hdr *iph = nh;
+               iphdr->len = sizeof(struct ipv6hdr);
+               iphdr->protocol = iph->nexthdr;
+               ipv6_addr_copy(&iphdr->saddr.in6, &iph->saddr);
+               ipv6_addr_copy(&iphdr->daddr.in6, &iph->daddr);
+       } else
+#endif
+       {
+               const struct iphdr *iph = nh;
+               iphdr->len = iph->ihl * 4;
+               iphdr->protocol = iph->protocol;
+               iphdr->saddr.ip = iph->saddr;
+               iphdr->daddr.ip = iph->daddr;
+       }
+}
+
+static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst,
+                                  const union nf_inet_addr *src)
+{
+#ifdef CONFIG_IP_VS_IPV6
+       if (af == AF_INET6)
+               ipv6_addr_copy(&dst->in6, &src->in6);
+       else
+#endif
+       dst->ip = src->ip;
+}
+
+static inline int ip_vs_addr_equal(int af, const union nf_inet_addr *a,
+                                  const union nf_inet_addr *b)
+{
+#ifdef CONFIG_IP_VS_IPV6
+       if (af == AF_INET6)
+               return ipv6_addr_equal(&a->in6, &b->in6);
+#endif
+       return a->ip == b->ip;
+}
 
 #ifdef CONFIG_IP_VS_DEBUG
 #include <linux/net.h>
 
 extern int ip_vs_get_debug_level(void);
+
+static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
+                                        const union nf_inet_addr *addr,
+                                        int *idx)
+{
+       int len;
+#ifdef CONFIG_IP_VS_IPV6
+       if (af == AF_INET6)
+               len = snprintf(&buf[*idx], buf_len - *idx, "[" NIP6_FMT "]",
+                              NIP6(addr->in6)) + 1;
+       else
+#endif
+               len = snprintf(&buf[*idx], buf_len - *idx, NIPQUAD_FMT,
+                              NIPQUAD(addr->ip)) + 1;
+
+       *idx += len;
+       BUG_ON(*idx > buf_len + 1);
+       return &buf[*idx - len];
+}
+
+#define IP_VS_DBG_BUF(level, msg...)                   \
+    do {                                               \
+           char ip_vs_dbg_buf[160];                    \
+           int ip_vs_dbg_idx = 0;                      \
+           if (level <= ip_vs_get_debug_level())       \
+                   printk(KERN_DEBUG "IPVS: " msg);    \
+    } while (0)
+#define IP_VS_ERR_BUF(msg...)                          \
+    do {                                               \
+           char ip_vs_dbg_buf[160];                    \
+           int ip_vs_dbg_idx = 0;                      \
+           printk(KERN_ERR "IPVS: " msg);              \
+    } while (0)
+
+/* Only use from within IP_VS_DBG_BUF() or IP_VS_ERR_BUF macros */
+#define IP_VS_DBG_ADDR(af, addr)                       \
+    ip_vs_dbg_addr(af, ip_vs_dbg_buf,                  \
+                  sizeof(ip_vs_dbg_buf), addr,         \
+                  &ip_vs_dbg_idx)
+
 #define IP_VS_DBG(level, msg...)                       \
     do {                                               \
            if (level <= ip_vs_get_debug_level())       \
@@ -48,6 +140,8 @@ extern int ip_vs_get_debug_level(void);
                pp->debug_packet(pp, skb, ofs, msg);    \
     } while (0)
 #else  /* NO DEBUGGING at ALL */
+#define IP_VS_DBG_BUF(level, msg...)  do {} while (0)
+#define IP_VS_ERR_BUF(msg...)  do {} while (0)
 #define IP_VS_DBG(level, msg...)  do {} while (0)
 #define IP_VS_DBG_RL(msg...)  do {} while (0)
 #define IP_VS_DBG_PKT(level, pp, skb, ofs, msg)                do {} while (0)
@@ -160,27 +254,10 @@ struct ip_vs_estimator {
 
 struct ip_vs_stats
 {
-       __u32                   conns;          /* connections scheduled */
-       __u32                   inpkts;         /* incoming packets */
-       __u32                   outpkts;        /* outgoing packets */
-       __u64                   inbytes;        /* incoming bytes */
-       __u64                   outbytes;       /* outgoing bytes */
-
-       __u32                   cps;            /* current connection rate */
-       __u32                   inpps;          /* current in packet rate */
-       __u32                   outpps;         /* current out packet rate */
-       __u32                   inbps;          /* current in byte rate */
-       __u32                   outbps;         /* current out byte rate */
-
-       /*
-        * Don't add anything before the lock, because we use memcpy() to copy
-        * the members before the lock to struct ip_vs_stats_user in
-        * ip_vs_ctl.c.
-        */
+       struct ip_vs_stats_user ustats;         /* statistics */
+       struct ip_vs_estimator  est;            /* estimator */
 
        spinlock_t              lock;           /* spin lock */
-
-       struct ip_vs_estimator  est;            /* estimator */
 };
 
 struct dst_entry;
@@ -202,21 +279,23 @@ struct ip_vs_protocol {
 
        void (*exit)(struct ip_vs_protocol *pp);
 
-       int (*conn_schedule)(struct sk_buff *skb,
+       int (*conn_schedule)(int af, struct sk_buff *skb,
                             struct ip_vs_protocol *pp,
                             int *verdict, struct ip_vs_conn **cpp);
 
        struct ip_vs_conn *
-       (*conn_in_get)(const struct sk_buff *skb,
+       (*conn_in_get)(int af,
+                      const struct sk_buff *skb,
                       struct ip_vs_protocol *pp,
-                      const struct iphdr *iph,
+                      const struct ip_vs_iphdr *iph,
                       unsigned int proto_off,
                       int inverse);
 
        struct ip_vs_conn *
-       (*conn_out_get)(const struct sk_buff *skb,
+       (*conn_out_get)(int af,
+                       const struct sk_buff *skb,
                        struct ip_vs_protocol *pp,
-                       const struct iphdr *iph,
+                       const struct ip_vs_iphdr *iph,
                        unsigned int proto_off,
                        int inverse);
 
@@ -226,7 +305,8 @@ struct ip_vs_protocol {
        int (*dnat_handler)(struct sk_buff *skb,
                            struct ip_vs_protocol *pp, struct ip_vs_conn *cp);
 
-       int (*csum_check)(struct sk_buff *skb, struct ip_vs_protocol *pp);
+       int (*csum_check)(int af, struct sk_buff *skb,
+                         struct ip_vs_protocol *pp);
 
        const char *(*state_name)(int state);
 
@@ -259,9 +339,10 @@ struct ip_vs_conn {
        struct list_head        c_list;         /* hashed list heads */
 
        /* Protocol, addresses and port numbers */
-       __be32                   caddr;          /* client address */
-       __be32                   vaddr;          /* virtual address */
-       __be32                   daddr;          /* destination address */
+       u16                      af;            /* address family */
+       union nf_inet_addr       caddr;          /* client address */
+       union nf_inet_addr       vaddr;          /* virtual address */
+       union nf_inet_addr       daddr;          /* destination address */
        __be16                   cport;
        __be16                   vport;
        __be16                   dport;
@@ -305,6 +386,45 @@ struct ip_vs_conn {
 
 
 /*
+ *     Extended internal versions of struct ip_vs_service_user and
+ *     ip_vs_dest_user for IPv6 support.
+ *
+ *     We need these to conveniently pass around service and destination
+ *     options, but unfortunately, we also need to keep the old definitions to
+ *     maintain userspace backwards compatibility for the setsockopt interface.
+ */
+struct ip_vs_service_user_kern {
+       /* virtual service addresses */
+       u16                     af;
+       u16                     protocol;
+       union nf_inet_addr      addr;           /* virtual ip address */
+       u16                     port;
+       u32                     fwmark;         /* firwall mark of service */
+
+       /* virtual service options */
+       char                    *sched_name;
+       unsigned                flags;          /* virtual service flags */
+       unsigned                timeout;        /* persistent timeout in sec */
+       u32                     netmask;        /* persistent netmask */
+};
+
+
+struct ip_vs_dest_user_kern {
+       /* destination server address */
+       union nf_inet_addr      addr;
+       u16                     port;
+
+       /* real server options */
+       unsigned                conn_flags;     /* connection flags */
+       int                     weight;         /* destination weight */
+
+       /* thresholds for active connections */
+       u32                     u_threshold;    /* upper threshold */
+       u32                     l_threshold;    /* lower threshold */
+};
+
+
+/*
  *     The information about the virtual service offered to the net
  *     and the forwarding entries
  */
@@ -314,8 +434,9 @@ struct ip_vs_service {
        atomic_t                refcnt;   /* reference counter */
        atomic_t                usecnt;   /* use counter */
 
+       u16                     af;       /* address family */
        __u16                   protocol; /* which protocol (TCP/UDP) */
-       __be32                  addr;     /* IP address for virtual service */
+       union nf_inet_addr      addr;     /* IP address for virtual service */
        __be16                  port;     /* port number for the service */
        __u32                   fwmark;   /* firewall mark of the service */
        unsigned                flags;    /* service status flags */
@@ -342,7 +463,8 @@ struct ip_vs_dest {
        struct list_head        n_list;   /* for the dests in the service */
        struct list_head        d_list;   /* for table with all the dests */
 
-       __be32                  addr;           /* IP address of the server */
+       u16                     af;             /* address family */
+       union nf_inet_addr      addr;           /* IP address of the server */
        __be16                  port;           /* port number of the server */
        volatile unsigned       flags;          /* dest status flags */
        atomic_t                conn_flags;     /* flags to copy to conn */
@@ -366,7 +488,7 @@ struct ip_vs_dest {
        /* for virtual service */
        struct ip_vs_service    *svc;           /* service it belongs to */
        __u16                   protocol;       /* which protocol (TCP/UDP) */
-       __be32                  vaddr;          /* virtual IP address */
+       union nf_inet_addr      vaddr;          /* virtual IP address */
        __be16                  vport;          /* virtual port number */
        __u32                   vfwmark;        /* firewall mark of service */
 };
@@ -380,6 +502,9 @@ struct ip_vs_scheduler {
        char                    *name;          /* scheduler name */
        atomic_t                refcnt;         /* reference counter */
        struct module           *module;        /* THIS_MODULE/NULL */
+#ifdef CONFIG_IP_VS_IPV6
+       int                     supports_ipv6;  /* scheduler has IPv6 support */
+#endif
 
        /* scheduler initializing service */
        int (*init_service)(struct ip_vs_service *svc);
@@ -479,16 +604,8 @@ extern void ip_vs_init_hash_table(struct list_head *table, int rows);
 #ifndef CONFIG_IP_VS_TAB_BITS
 #define CONFIG_IP_VS_TAB_BITS   12
 #endif
-/* make sure that IP_VS_CONN_TAB_BITS is located in [8, 20] */
-#if CONFIG_IP_VS_TAB_BITS < 8
-#define IP_VS_CONN_TAB_BITS    8
-#endif
-#if CONFIG_IP_VS_TAB_BITS > 20
-#define IP_VS_CONN_TAB_BITS    20
-#endif
-#if 8 <= CONFIG_IP_VS_TAB_BITS && CONFIG_IP_VS_TAB_BITS <= 20
+
 #define IP_VS_CONN_TAB_BITS    CONFIG_IP_VS_TAB_BITS
-#endif
 #define IP_VS_CONN_TAB_SIZE     (1 << IP_VS_CONN_TAB_BITS)
 #define IP_VS_CONN_TAB_MASK     (IP_VS_CONN_TAB_SIZE - 1)
 
@@ -500,11 +617,16 @@ enum {
 };
 
 extern struct ip_vs_conn *ip_vs_conn_in_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port);
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port);
+
 extern struct ip_vs_conn *ip_vs_ct_in_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port);
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port);
+
 extern struct ip_vs_conn *ip_vs_conn_out_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port);
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port);
 
 /* put back the conn without restarting its timer */
 static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
@@ -515,8 +637,9 @@ extern void ip_vs_conn_put(struct ip_vs_conn *cp);
 extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
 
 extern struct ip_vs_conn *
-ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport,
-              __be32 daddr, __be16 dport, unsigned flags,
+ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
+              const union nf_inet_addr *vaddr, __be16 vport,
+              const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
               struct ip_vs_dest *dest);
 extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
 
@@ -532,24 +655,32 @@ static inline void ip_vs_control_del(struct ip_vs_conn *cp)
 {
        struct ip_vs_conn *ctl_cp = cp->control;
        if (!ctl_cp) {
-               IP_VS_ERR("request control DEL for uncontrolled: "
-                         "%d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n",
-                         NIPQUAD(cp->caddr),ntohs(cp->cport),
-                         NIPQUAD(cp->vaddr),ntohs(cp->vport));
+               IP_VS_ERR_BUF("request control DEL for uncontrolled: "
+                             "%s:%d to %s:%d\n",
+                             IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+                             ntohs(cp->cport),
+                             IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+                             ntohs(cp->vport));
+
                return;
        }
 
-       IP_VS_DBG(7, "DELeting control for: "
-                 "cp.dst=%d.%d.%d.%d:%d ctl_cp.dst=%d.%d.%d.%d:%d\n",
-                 NIPQUAD(cp->caddr),ntohs(cp->cport),
-                 NIPQUAD(ctl_cp->caddr),ntohs(ctl_cp->cport));
+       IP_VS_DBG_BUF(7, "DELeting control for: "
+                     "cp.dst=%s:%d ctl_cp.dst=%s:%d\n",
+                     IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+                     ntohs(cp->cport),
+                     IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr),
+                     ntohs(ctl_cp->cport));
 
        cp->control = NULL;
        if (atomic_read(&ctl_cp->n_control) == 0) {
-               IP_VS_ERR("BUG control DEL with n=0 : "
-                         "%d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n",
-                         NIPQUAD(cp->caddr),ntohs(cp->cport),
-                         NIPQUAD(cp->vaddr),ntohs(cp->vport));
+               IP_VS_ERR_BUF("BUG control DEL with n=0 : "
+                             "%s:%d to %s:%d\n",
+                             IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+                             ntohs(cp->cport),
+                             IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+                             ntohs(cp->vport));
+
                return;
        }
        atomic_dec(&ctl_cp->n_control);
@@ -559,17 +690,22 @@ static inline void
 ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
 {
        if (cp->control) {
-               IP_VS_ERR("request control ADD for already controlled: "
-                         "%d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n",
-                         NIPQUAD(cp->caddr),ntohs(cp->cport),
-                         NIPQUAD(cp->vaddr),ntohs(cp->vport));
+               IP_VS_ERR_BUF("request control ADD for already controlled: "
+                             "%s:%d to %s:%d\n",
+                             IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+                             ntohs(cp->cport),
+                             IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+                             ntohs(cp->vport));
+
                ip_vs_control_del(cp);
        }
 
-       IP_VS_DBG(7, "ADDing control for: "
-                 "cp.dst=%d.%d.%d.%d:%d ctl_cp.dst=%d.%d.%d.%d:%d\n",
-                 NIPQUAD(cp->caddr),ntohs(cp->cport),
-                 NIPQUAD(ctl_cp->caddr),ntohs(ctl_cp->cport));
+       IP_VS_DBG_BUF(7, "ADDing control for: "
+                     "cp.dst=%s:%d ctl_cp.dst=%s:%d\n",
+                     IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+                     ntohs(cp->cport),
+                     IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr),
+                     ntohs(ctl_cp->cport));
 
        cp->control = ctl_cp;
        atomic_inc(&ctl_cp->n_control);
@@ -647,7 +783,8 @@ extern struct ip_vs_stats ip_vs_stats;
 extern const struct ctl_path net_vs_ctl_path[];
 
 extern struct ip_vs_service *
-ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport);
+ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+                 const union nf_inet_addr *vaddr, __be16 vport);
 
 static inline void ip_vs_service_put(struct ip_vs_service *svc)
 {
@@ -655,14 +792,16 @@ static inline void ip_vs_service_put(struct ip_vs_service *svc)
 }
 
 extern struct ip_vs_dest *
-ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport);
+ip_vs_lookup_real_service(int af, __u16 protocol,
+                         const union nf_inet_addr *daddr, __be16 dport);
+
 extern int ip_vs_use_count_inc(void);
 extern void ip_vs_use_count_dec(void);
 extern int ip_vs_control_init(void);
 extern void ip_vs_control_cleanup(void);
 extern struct ip_vs_dest *
-ip_vs_find_dest(__be32 daddr, __be16 dport,
-                __be32 vaddr, __be16 vport, __u16 protocol);
+ip_vs_find_dest(int af, const union nf_inet_addr *daddr, __be16 dport,
+               const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol);
 extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
 
 
@@ -706,6 +845,19 @@ extern int ip_vs_icmp_xmit
 (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset);
 extern void ip_vs_dst_reset(struct ip_vs_dest *dest);
 
+#ifdef CONFIG_IP_VS_IPV6
+extern int ip_vs_bypass_xmit_v6
+(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
+extern int ip_vs_nat_xmit_v6
+(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
+extern int ip_vs_tunnel_xmit_v6
+(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
+extern int ip_vs_dr_xmit_v6
+(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
+extern int ip_vs_icmp_xmit_v6
+(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp,
+ int offset);
+#endif
 
 /*
  *     This is a simple mechanism to ignore packets when
@@ -750,7 +902,12 @@ static inline char ip_vs_fwd_tag(struct ip_vs_conn *cp)
 }
 
 extern void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
-               struct ip_vs_conn *cp, int dir);
+                          struct ip_vs_conn *cp, int dir);
+
+#ifdef CONFIG_IP_VS_IPV6
+extern void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
+                             struct ip_vs_conn *cp, int dir);
+#endif
 
 extern __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset);
 
@@ -761,6 +918,17 @@ static inline __wsum ip_vs_check_diff4(__be32 old, __be32 new, __wsum oldsum)
        return csum_partial((char *) diff, sizeof(diff), oldsum);
 }
 
+#ifdef CONFIG_IP_VS_IPV6
+static inline __wsum ip_vs_check_diff16(const __be32 *old, const __be32 *new,
+                                       __wsum oldsum)
+{
+       __be32 diff[8] = { ~old[3], ~old[2], ~old[1], ~old[0],
+                           new[3],  new[2],  new[1],  new[0] };
+
+       return csum_partial((char *) diff, sizeof(diff), oldsum);
+}
+#endif
+
 static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum)
 {
        __be16 diff[2] = { ~old, new };
index 2e48a7e..de6004d 100644 (file)
@@ -24,6 +24,14 @@ menuconfig IP_VS
 
 if IP_VS
 
+config IP_VS_IPV6
+       bool "IPv6 support for IPVS (DANGEROUS)"
+       depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6)
+       ---help---
+         Add IPv6 support to IPVS. This is incomplete and might be dangerous.
+
+         Say N if unsure.
+
 config IP_VS_DEBUG
        bool "IP virtual server debugging"
        ---help---
@@ -33,7 +41,8 @@ config        IP_VS_DEBUG
 
 config IP_VS_TAB_BITS
        int "IPVS connection table size (the Nth power of 2)"
-       default "12" 
+       range 8 20
+       default 12
        ---help---
          The IPVS connection hash table uses the chaining scheme to handle
          hash collisions. Using a big IPVS connection hash table will greatly
index 44a6872..9a24332 100644 (file)
@@ -114,9 +114,18 @@ static inline void ct_write_unlock_bh(unsigned key)
 /*
  *     Returns hash value for IPVS connection entry
  */
-static unsigned int ip_vs_conn_hashkey(unsigned proto, __be32 addr, __be16 port)
+static unsigned int ip_vs_conn_hashkey(int af, unsigned proto,
+                                      const union nf_inet_addr *addr,
+                                      __be16 port)
 {
-       return jhash_3words((__force u32)addr, (__force u32)port, proto, ip_vs_conn_rnd)
+#ifdef CONFIG_IP_VS_IPV6
+       if (af == AF_INET6)
+               return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
+                                   (__force u32)port, proto, ip_vs_conn_rnd)
+                       & IP_VS_CONN_TAB_MASK;
+#endif
+       return jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
+                           ip_vs_conn_rnd)
                & IP_VS_CONN_TAB_MASK;
 }
 
@@ -131,7 +140,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
        int ret;
 
        /* Hash by protocol, client address and port */
-       hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport);
+       hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
 
        ct_write_lock(hash);
 
@@ -162,7 +171,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
        int ret;
 
        /* unhash it and decrease its reference counter */
-       hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport);
+       hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
 
        ct_write_lock(hash);
 
@@ -187,20 +196,23 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
  *     d_addr, d_port: pkt dest address (load balancer)
  */
 static inline struct ip_vs_conn *__ip_vs_conn_in_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
 {
        unsigned hash;
        struct ip_vs_conn *cp;
 
-       hash = ip_vs_conn_hashkey(protocol, s_addr, s_port);
+       hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port);
 
        ct_read_lock(hash);
 
        list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
-               if (s_addr==cp->caddr && s_port==cp->cport &&
-                   d_port==cp->vport && d_addr==cp->vaddr &&
+               if (cp->af == af &&
+                   ip_vs_addr_equal(af, s_addr, &cp->caddr) &&
+                   ip_vs_addr_equal(af, d_addr, &cp->vaddr) &&
+                   s_port == cp->cport && d_port == cp->vport &&
                    ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
-                   protocol==cp->protocol) {
+                   protocol == cp->protocol) {
                        /* HIT */
                        atomic_inc(&cp->refcnt);
                        ct_read_unlock(hash);
@@ -214,39 +226,44 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get
 }
 
 struct ip_vs_conn *ip_vs_conn_in_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
 {
        struct ip_vs_conn *cp;
 
-       cp = __ip_vs_conn_in_get(protocol, s_addr, s_port, d_addr, d_port);
+       cp = __ip_vs_conn_in_get(af, protocol, s_addr, s_port, d_addr, d_port);
        if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt))
-               cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port);
+               cp = __ip_vs_conn_in_get(af, protocol, s_addr, 0, d_addr,
+                                        d_port);
 
-       IP_VS_DBG(9, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
-                 ip_vs_proto_name(protocol),
-                 NIPQUAD(s_addr), ntohs(s_port),
-                 NIPQUAD(d_addr), ntohs(d_port),
-                 cp?"hit":"not hit");
+       IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n",
+                     ip_vs_proto_name(protocol),
+                     IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
+                     IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
+                     cp ? "hit" : "not hit");
 
        return cp;
 }
 
 /* Get reference to connection template */
 struct ip_vs_conn *ip_vs_ct_in_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
 {
        unsigned hash;
        struct ip_vs_conn *cp;
 
-       hash = ip_vs_conn_hashkey(protocol, s_addr, s_port);
+       hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port);
 
        ct_read_lock(hash);
 
        list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
-               if (s_addr==cp->caddr && s_port==cp->cport &&
-                   d_port==cp->vport && d_addr==cp->vaddr &&
+               if (cp->af == af &&
+                   ip_vs_addr_equal(af, s_addr, &cp->caddr) &&
+                   ip_vs_addr_equal(af, d_addr, &cp->vaddr) &&
+                   s_port == cp->cport && d_port == cp->vport &&
                    cp->flags & IP_VS_CONN_F_TEMPLATE &&
-                   protocol==cp->protocol) {
+                   protocol == cp->protocol) {
                        /* HIT */
                        atomic_inc(&cp->refcnt);
                        goto out;
@@ -257,11 +274,11 @@ struct ip_vs_conn *ip_vs_ct_in_get
   out:
        ct_read_unlock(hash);
 
-       IP_VS_DBG(9, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
-                 ip_vs_proto_name(protocol),
-                 NIPQUAD(s_addr), ntohs(s_port),
-                 NIPQUAD(d_addr), ntohs(d_port),
-                 cp?"hit":"not hit");
+       IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n",
+                     ip_vs_proto_name(protocol),
+                     IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
+                     IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
+                     cp ? "hit" : "not hit");
 
        return cp;
 }
@@ -273,7 +290,8 @@ struct ip_vs_conn *ip_vs_ct_in_get
  *     d_addr, d_port: pkt dest address (foreign host)
  */
 struct ip_vs_conn *ip_vs_conn_out_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
 {
        unsigned hash;
        struct ip_vs_conn *cp, *ret=NULL;
@@ -281,13 +299,15 @@ struct ip_vs_conn *ip_vs_conn_out_get
        /*
         *      Check for "full" addressed entries
         */
-       hash = ip_vs_conn_hashkey(protocol, d_addr, d_port);
+       hash = ip_vs_conn_hashkey(af, protocol, d_addr, d_port);
 
        ct_read_lock(hash);
 
        list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
-               if (d_addr == cp->caddr && d_port == cp->cport &&
-                   s_port == cp->dport && s_addr == cp->daddr &&
+               if (cp->af == af &&
+                   ip_vs_addr_equal(af, d_addr, &cp->caddr) &&
+                   ip_vs_addr_equal(af, s_addr, &cp->daddr) &&
+                   d_port == cp->cport && s_port == cp->dport &&
                    protocol == cp->protocol) {
                        /* HIT */
                        atomic_inc(&cp->refcnt);
@@ -298,11 +318,11 @@ struct ip_vs_conn *ip_vs_conn_out_get
 
        ct_read_unlock(hash);
 
-       IP_VS_DBG(9, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
-                 ip_vs_proto_name(protocol),
-                 NIPQUAD(s_addr), ntohs(s_port),
-                 NIPQUAD(d_addr), ntohs(d_port),
-                 ret?"hit":"not hit");
+       IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n",
+                     ip_vs_proto_name(protocol),
+                     IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
+                     IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
+                     ret ? "hit" : "not hit");
 
        return ret;
 }
@@ -369,6 +389,33 @@ static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp)
        }
 }
 
+#ifdef CONFIG_IP_VS_IPV6
+static inline void ip_vs_bind_xmit_v6(struct ip_vs_conn *cp)
+{
+       switch (IP_VS_FWD_METHOD(cp)) {
+       case IP_VS_CONN_F_MASQ:
+               cp->packet_xmit = ip_vs_nat_xmit_v6;
+               break;
+
+       case IP_VS_CONN_F_TUNNEL:
+               cp->packet_xmit = ip_vs_tunnel_xmit_v6;
+               break;
+
+       case IP_VS_CONN_F_DROUTE:
+               cp->packet_xmit = ip_vs_dr_xmit_v6;
+               break;
+
+       case IP_VS_CONN_F_LOCALNODE:
+               cp->packet_xmit = ip_vs_null_xmit;
+               break;
+
+       case IP_VS_CONN_F_BYPASS:
+               cp->packet_xmit = ip_vs_bypass_xmit_v6;
+               break;
+       }
+}
+#endif
+
 
 static inline int ip_vs_dest_totalconns(struct ip_vs_dest *dest)
 {
@@ -402,16 +449,16 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
                cp->flags |= atomic_read(&dest->conn_flags);
        cp->dest = dest;
 
-       IP_VS_DBG(7, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
-                 "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
-                 "dest->refcnt:%d\n",
-                 ip_vs_proto_name(cp->protocol),
-                 NIPQUAD(cp->caddr), ntohs(cp->cport),
-                 NIPQUAD(cp->vaddr), ntohs(cp->vport),
-                 NIPQUAD(cp->daddr), ntohs(cp->dport),
-                 ip_vs_fwd_tag(cp), cp->state,
-                 cp->flags, atomic_read(&cp->refcnt),
-                 atomic_read(&dest->refcnt));
+       IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d "
+                     "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
+                     "dest->refcnt:%d\n",
+                     ip_vs_proto_name(cp->protocol),
+                     IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
+                     IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
+                     IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+                     ip_vs_fwd_tag(cp), cp->state,
+                     cp->flags, atomic_read(&cp->refcnt),
+                     atomic_read(&dest->refcnt));
 
        /* Update the connection counters */
        if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
@@ -444,8 +491,9 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
        struct ip_vs_dest *dest;
 
        if ((cp) && (!cp->dest)) {
-               dest = ip_vs_find_dest(cp->daddr, cp->dport,
-                                      cp->vaddr, cp->vport, cp->protocol);
+               dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport,
+                                      &cp->vaddr, cp->vport,
+                                      cp->protocol);
                ip_vs_bind_dest(cp, dest);
                return dest;
        } else
@@ -464,16 +512,16 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
        if (!dest)
                return;
 
-       IP_VS_DBG(7, "Unbind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
-                 "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
-                 "dest->refcnt:%d\n",
-                 ip_vs_proto_name(cp->protocol),
-                 NIPQUAD(cp->caddr), ntohs(cp->cport),
-                 NIPQUAD(cp->vaddr), ntohs(cp->vport),
-                 NIPQUAD(cp->daddr), ntohs(cp->dport),
-                 ip_vs_fwd_tag(cp), cp->state,
-                 cp->flags, atomic_read(&cp->refcnt),
-                 atomic_read(&dest->refcnt));
+       IP_VS_DBG_BUF(7, "Unbind-dest %s c:%s:%d v:%s:%d "
+                     "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
+                     "dest->refcnt:%d\n",
+                     ip_vs_proto_name(cp->protocol),
+                     IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
+                     IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
+                     IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+                     ip_vs_fwd_tag(cp), cp->state,
+                     cp->flags, atomic_read(&cp->refcnt),
+                     atomic_read(&dest->refcnt));
 
        /* Update the connection counters */
        if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
@@ -526,13 +574,16 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
            !(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
            (sysctl_ip_vs_expire_quiescent_template &&
             (atomic_read(&dest->weight) == 0))) {
-               IP_VS_DBG(9, "check_template: dest not available for "
-                         "protocol %s s:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
-                         "-> d:%u.%u.%u.%u:%d\n",
-                         ip_vs_proto_name(ct->protocol),
-                         NIPQUAD(ct->caddr), ntohs(ct->cport),
-                         NIPQUAD(ct->vaddr), ntohs(ct->vport),
-                         NIPQUAD(ct->daddr), ntohs(ct->dport));
+               IP_VS_DBG_BUF(9, "check_template: dest not available for "
+                             "protocol %s s:%s:%d v:%s:%d "
+                             "-> d:%s:%d\n",
+                             ip_vs_proto_name(ct->protocol),
+                             IP_VS_DBG_ADDR(ct->af, &ct->caddr),
+                             ntohs(ct->cport),
+                             IP_VS_DBG_ADDR(ct->af, &ct->vaddr),
+                             ntohs(ct->vport),
+                             IP_VS_DBG_ADDR(ct->af, &ct->daddr),
+                             ntohs(ct->dport));
 
                /*
                 * Invalidate the connection template
@@ -625,8 +676,9 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
  *     Create a new connection entry and hash it into the ip_vs_conn_tab
  */
 struct ip_vs_conn *
-ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport,
-              __be32 daddr, __be16 dport, unsigned flags,
+ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
+              const union nf_inet_addr *vaddr, __be16 vport,
+              const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
               struct ip_vs_dest *dest)
 {
        struct ip_vs_conn *cp;
@@ -640,12 +692,13 @@ ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport
 
        INIT_LIST_HEAD(&cp->c_list);
        setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
+       cp->af             = af;
        cp->protocol       = proto;
-       cp->caddr          = caddr;
+       ip_vs_addr_copy(af, &cp->caddr, caddr);
        cp->cport          = cport;
-       cp->vaddr          = vaddr;
+       ip_vs_addr_copy(af, &cp->vaddr, vaddr);
        cp->vport          = vport;
-       cp->daddr          = daddr;
+       ip_vs_addr_copy(af, &cp->daddr, daddr);
        cp->dport          = dport;
        cp->flags          = flags;
        spin_lock_init(&cp->lock);
@@ -672,7 +725,12 @@ ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport
        cp->timeout = 3*HZ;
 
        /* Bind its packet transmitter */
-       ip_vs_bind_xmit(cp);
+#ifdef CONFIG_IP_VS_IPV6
+       if (af == AF_INET6)
+               ip_vs_bind_xmit_v6(cp);
+       else
+#endif
+               ip_vs_bind_xmit(cp);
 
        if (unlikely(pp && atomic_read(&pp->appcnt)))
                ip_vs_bind_app(cp, pp);
@@ -760,12 +818,26 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
        else {
                const struct ip_vs_conn *cp = v;
 
-               seq_printf(seq,
-                       "%-3s %08X %04X %08X %04X %08X %04X %-11s %7lu\n",
+#ifdef CONFIG_IP_VS_IPV6
+               if (cp->af == AF_INET6)
+                       seq_printf(seq,
+                               "%-3s " NIP6_FMT " %04X " NIP6_FMT
+                               " %04X " NIP6_FMT " %04X %-11s %7lu\n",
+                               ip_vs_proto_name(cp->protocol),
+                               NIP6(cp->caddr.in6), ntohs(cp->cport),
+                               NIP6(cp->vaddr.in6), ntohs(cp->vport),
+                               NIP6(cp->daddr.in6), ntohs(cp->dport),
+                               ip_vs_state_name(cp->protocol, cp->state),
+                               (cp->timer.expires-jiffies)/HZ);
+               else
+#endif
+                       seq_printf(seq,
+                               "%-3s %08X %04X %08X %04X"
+                               " %08X %04X %-11s %7lu\n",
                                ip_vs_proto_name(cp->protocol),
-                               ntohl(cp->caddr), ntohs(cp->cport),
-                               ntohl(cp->vaddr), ntohs(cp->vport),
-                               ntohl(cp->daddr), ntohs(cp->dport),
+                               ntohl(cp->caddr.ip), ntohs(cp->cport),
+                               ntohl(cp->vaddr.ip), ntohs(cp->vport),
+                               ntohl(cp->daddr.ip), ntohs(cp->dport),
                                ip_vs_state_name(cp->protocol, cp->state),
                                (cp->timer.expires-jiffies)/HZ);
        }
@@ -809,12 +881,27 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
        else {
                const struct ip_vs_conn *cp = v;
 
-               seq_printf(seq,
-                       "%-3s %08X %04X %08X %04X %08X %04X %-11s %-6s %7lu\n",
+#ifdef CONFIG_IP_VS_IPV6
+               if (cp->af == AF_INET6)
+                       seq_printf(seq,
+                               "%-3s " NIP6_FMT " %04X " NIP6_FMT
+                               " %04X " NIP6_FMT " %04X %-11s %-6s %7lu\n",
+                               ip_vs_proto_name(cp->protocol),
+                               NIP6(cp->caddr.in6), ntohs(cp->cport),
+                               NIP6(cp->vaddr.in6), ntohs(cp->vport),
+                               NIP6(cp->daddr.in6), ntohs(cp->dport),
+                               ip_vs_state_name(cp->protocol, cp->state),
+                               ip_vs_origin_name(cp->flags),
+                               (cp->timer.expires-jiffies)/HZ);
+               else
+#endif
+                       seq_printf(seq,
+                               "%-3s %08X %04X %08X %04X "
+                               "%08X %04X %-11s %-6s %7lu\n",
                                ip_vs_proto_name(cp->protocol),
-                               ntohl(cp->caddr), ntohs(cp->cport),
-                               ntohl(cp->vaddr), ntohs(cp->vport),
-                               ntohl(cp->daddr), ntohs(cp->dport),
+                               ntohl(cp->caddr.ip), ntohs(cp->cport),
+                               ntohl(cp->vaddr.ip), ntohs(cp->vport),
+                               ntohl(cp->daddr.ip), ntohs(cp->dport),
                                ip_vs_state_name(cp->protocol, cp->state),
                                ip_vs_origin_name(cp->flags),
                                (cp->timer.expires-jiffies)/HZ);
index 9fbf0a6..80a4fcf 100644 (file)
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 
+#ifdef CONFIG_IP_VS_IPV6
+#include <net/ipv6.h>
+#include <linux/netfilter_ipv6.h>
+#endif
+
 #include <net/ip_vs.h>
 
 
@@ -60,6 +65,7 @@ EXPORT_SYMBOL(ip_vs_get_debug_level);
 
 /* ID used in ICMP lookups */
 #define icmp_id(icmph)          (((icmph)->un).echo.id)
+#define icmpv6_id(icmph)        (icmph->icmp6_dataun.u_echo.identifier)
 
 const char *ip_vs_proto_name(unsigned proto)
 {
@@ -74,6 +80,10 @@ const char *ip_vs_proto_name(unsigned proto)
                return "TCP";
        case IPPROTO_ICMP:
                return "ICMP";
+#ifdef CONFIG_IP_VS_IPV6
+       case IPPROTO_ICMPV6:
+               return "ICMPv6";
+#endif
        default:
                sprintf(buf, "IP_%d", proto);
                return buf;
@@ -92,18 +102,18 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
        struct ip_vs_dest *dest = cp->dest;
        if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
                spin_lock(&dest->stats.lock);
-               dest->stats.inpkts++;
-               dest->stats.inbytes += skb->len;
+               dest->stats.ustats.inpkts++;
+               dest->stats.ustats.inbytes += skb->len;
                spin_unlock(&dest->stats.lock);
 
                spin_lock(&dest->svc->stats.lock);
-               dest->svc->stats.inpkts++;
-               dest->svc->stats.inbytes += skb->len;
+               dest->svc->stats.ustats.inpkts++;
+               dest->svc->stats.ustats.inbytes += skb->len;
                spin_unlock(&dest->svc->stats.lock);
 
                spin_lock(&ip_vs_stats.lock);
-               ip_vs_stats.inpkts++;
-               ip_vs_stats.inbytes += skb->len;
+               ip_vs_stats.ustats.inpkts++;
+               ip_vs_stats.ustats.inbytes += skb->len;
                spin_unlock(&ip_vs_stats.lock);
        }
 }
@@ -115,18 +125,18 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
        struct ip_vs_dest *dest = cp->dest;
        if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
                spin_lock(&dest->stats.lock);
-               dest->stats.outpkts++;
-               dest->stats.outbytes += skb->len;
+               dest->stats.ustats.outpkts++;
+               dest->stats.ustats.outbytes += skb->len;
                spin_unlock(&dest->stats.lock);
 
                spin_lock(&dest->svc->stats.lock);
-               dest->svc->stats.outpkts++;
-               dest->svc->stats.outbytes += skb->len;
+               dest->svc->stats.ustats.outpkts++;
+               dest->svc->stats.ustats.outbytes += skb->len;
                spin_unlock(&dest->svc->stats.lock);
 
                spin_lock(&ip_vs_stats.lock);
-               ip_vs_stats.outpkts++;
-               ip_vs_stats.outbytes += skb->len;
+               ip_vs_stats.ustats.outpkts++;
+               ip_vs_stats.ustats.outbytes += skb->len;
                spin_unlock(&ip_vs_stats.lock);
        }
 }
@@ -136,15 +146,15 @@ static inline void
 ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
 {
        spin_lock(&cp->dest->stats.lock);
-       cp->dest->stats.conns++;
+       cp->dest->stats.ustats.conns++;
        spin_unlock(&cp->dest->stats.lock);
 
        spin_lock(&svc->stats.lock);
-       svc->stats.conns++;
+       svc->stats.ustats.conns++;
        spin_unlock(&svc->stats.lock);
 
        spin_lock(&ip_vs_stats.lock);
-       ip_vs_stats.conns++;
+       ip_vs_stats.ustats.conns++;
        spin_unlock(&ip_vs_stats.lock);
 }
 
@@ -173,20 +183,28 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                    __be16 ports[2])
 {
        struct ip_vs_conn *cp = NULL;
-       struct iphdr *iph = ip_hdr(skb);
+       struct ip_vs_iphdr iph;
        struct ip_vs_dest *dest;
        struct ip_vs_conn *ct;
-       __be16  dport;   /* destination port to forward */
-       __be32  snet;    /* source network of the client, after masking */
+       __be16  dport;                  /* destination port to forward */
+       union nf_inet_addr snet;        /* source network of the client,
+                                          after masking */
+
+       ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
 
        /* Mask saddr with the netmask to adjust template granularity */
-       snet = iph->saddr & svc->netmask;
+#ifdef CONFIG_IP_VS_IPV6
+       if (svc->af == AF_INET6)
+               ipv6_addr_prefix(&snet.in6, &iph.saddr.in6, svc->netmask);
+       else
+#endif
+               snet.ip = iph.saddr.ip & svc->netmask;
 
-       IP_VS_DBG(6, "p-schedule: src %u.%u.%u.%u:%u dest %u.%u.%u.%u:%u "
-                 "mnet %u.%u.%u.%u\n",
-                 NIPQUAD(iph->saddr), ntohs(ports[0]),
-                 NIPQUAD(iph->daddr), ntohs(ports[1]),
-                 NIPQUAD(snet));
+       IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
+                     "mnet %s\n",
+                     IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]),
+                     IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]),
+                     IP_VS_DBG_ADDR(svc->af, &snet));
 
        /*
         * As far as we know, FTP is a very complicated network protocol, and
@@ -204,11 +222,11 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
        if (ports[1] == svc->port) {
                /* Check if a template already exists */
                if (svc->port != FTPPORT)
-                       ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
-                                              iph->daddr, ports[1]);
+                       ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
+                                            &iph.daddr, ports[1]);
                else
-                       ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
-                                              iph->daddr, 0);
+                       ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
+                                            &iph.daddr, 0);
 
                if (!ct || !ip_vs_check_template(ct)) {
                        /*
@@ -228,18 +246,18 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                         * for ftp service.
                         */
                        if (svc->port != FTPPORT)
-                               ct = ip_vs_conn_new(iph->protocol,
-                                                   snet, 0,
-                                                   iph->daddr,
+                               ct = ip_vs_conn_new(svc->af, iph.protocol,
+                                                   &snet, 0,
+                                                   &iph.daddr,
                                                    ports[1],
-                                                   dest->addr, dest->port,
+                                                   &dest->addr, dest->port,
                                                    IP_VS_CONN_F_TEMPLATE,
                                                    dest);
                        else
-                               ct = ip_vs_conn_new(iph->protocol,
-                                                   snet, 0,
-                                                   iph->daddr, 0,
-                                                   dest->addr, 0,
+                               ct = ip_vs_conn_new(svc->af, iph.protocol,
+                                                   &snet, 0,
+                                                   &iph.daddr, 0,
+                                                   &dest->addr, 0,
                                                    IP_VS_CONN_F_TEMPLATE,
                                                    dest);
                        if (ct == NULL)
@@ -258,12 +276,16 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                 * fwmark template: <IPPROTO_IP,caddr,0,fwmark,0,daddr,0>
                 * port zero template: <protocol,caddr,0,vaddr,0,daddr,0>
                 */
-               if (svc->fwmark)
-                       ct = ip_vs_ct_in_get(IPPROTO_IP, snet, 0,
-                                              htonl(svc->fwmark), 0);
-               else
-                       ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
-                                              iph->daddr, 0);
+               if (svc->fwmark) {
+                       union nf_inet_addr fwmark = {
+                               .all = { 0, 0, 0, htonl(svc->fwmark) }
+                       };
+
+                       ct = ip_vs_ct_in_get(svc->af, IPPROTO_IP, &snet, 0,
+                                            &fwmark, 0);
+               } else
+                       ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
+                                            &iph.daddr, 0);
 
                if (!ct || !ip_vs_check_template(ct)) {
                        /*
@@ -282,18 +304,22 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                        /*
                         * Create a template according to the service
                         */
-                       if (svc->fwmark)
-                               ct = ip_vs_conn_new(IPPROTO_IP,
-                                                   snet, 0,
-                                                   htonl(svc->fwmark), 0,
-                                                   dest->addr, 0,
+                       if (svc->fwmark) {
+                               union nf_inet_addr fwmark = {
+                                       .all = { 0, 0, 0, htonl(svc->fwmark) }
+                               };
+
+                               ct = ip_vs_conn_new(svc->af, IPPROTO_IP,
+                                                   &snet, 0,
+                                                   &fwmark, 0,
+                                                   &dest->addr, 0,
                                                    IP_VS_CONN_F_TEMPLATE,
                                                    dest);
-                       else
-                               ct = ip_vs_conn_new(iph->protocol,
-                                                   snet, 0,
-                                                   iph->daddr, 0,
-                                                   dest->addr, 0,
+                       else
+                               ct = ip_vs_conn_new(svc->af, iph.protocol,
+                                                   &snet, 0,
+                                                   &iph.daddr, 0,
+                                                   &dest->addr, 0,
                                                    IP_VS_CONN_F_TEMPLATE,
                                                    dest);
                        if (ct == NULL)
@@ -310,10 +336,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
        /*
         *    Create a new connection according to the template
         */
-       cp = ip_vs_conn_new(iph->protocol,
-                           iph->saddr, ports[0],
-                           iph->daddr, ports[1],
-                           dest->addr, dport,
+       cp = ip_vs_conn_new(svc->af, iph.protocol,
+                           &iph.saddr, ports[0],
+                           &iph.daddr, ports[1],
+                           &dest->addr, dport,
                            0,
                            dest);
        if (cp == NULL) {
@@ -342,12 +368,12 @@ struct ip_vs_conn *
 ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 {
        struct ip_vs_conn *cp = NULL;
-       struct iphdr *iph = ip_hdr(skb);
+       struct ip_vs_iphdr iph;
        struct ip_vs_dest *dest;
        __be16 _ports[2], *pptr;
 
-       pptr = skb_header_pointer(skb, iph->ihl*4,
-                                 sizeof(_ports), _ports);
+       ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
+       pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
        if (pptr == NULL)
                return NULL;
 
@@ -377,22 +403,22 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
        /*
         *    Create a connection entry.
         */
-       cp = ip_vs_conn_new(iph->protocol,
-                           iph->saddr, pptr[0],
-                           iph->daddr, pptr[1],
-                           dest->addr, dest->port?dest->port:pptr[1],
+       cp = ip_vs_conn_new(svc->af, iph.protocol,
+                           &iph.saddr, pptr[0],
+                           &iph.daddr, pptr[1],
+                           &dest->addr, dest->port ? dest->port : pptr[1],
                            0,
                            dest);
        if (cp == NULL)
                return NULL;
 
-       IP_VS_DBG(6, "Schedule fwd:%c c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u "
-                 "d:%u.%u.%u.%u:%u conn->flags:%X conn->refcnt:%d\n",
-                 ip_vs_fwd_tag(cp),
-                 NIPQUAD(cp->caddr), ntohs(cp->cport),
-                 NIPQUAD(cp->vaddr), ntohs(cp->vport),
-                 NIPQUAD(cp->daddr), ntohs(cp->dport),
-                 cp->flags, atomic_read(&cp->refcnt));
+       IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
+                     "d:%s:%u conn->flags:%X conn->refcnt:%d\n",
+                     ip_vs_fwd_tag(cp),
+                     IP_VS_DBG_ADDR(svc->af, &cp->caddr), ntohs(cp->cport),
+                     IP_VS_DBG_ADDR(svc->af, &cp->vaddr), ntohs(cp->vport),
+                     IP_VS_DBG_ADDR(svc->af, &cp->daddr), ntohs(cp->dport),
+                     cp->flags, atomic_read(&cp->refcnt));
 
        ip_vs_conn_stats(cp, svc);
        return cp;
@@ -408,20 +434,27 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
                struct ip_vs_protocol *pp)
 {
        __be16 _ports[2], *pptr;
-       struct iphdr *iph = ip_hdr(skb);
+       struct ip_vs_iphdr iph;
+       int unicast;
+       ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
 
-       pptr = skb_header_pointer(skb, iph->ihl*4,
-                                 sizeof(_ports), _ports);
+       pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
        if (pptr == NULL) {
                ip_vs_service_put(svc);
                return NF_DROP;
        }
 
+#ifdef CONFIG_IP_VS_IPV6
+       if (svc->af == AF_INET6)
+               unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST;
+       else
+#endif
+               unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST);
+
        /* if it is fwmark-based service, the cache_bypass sysctl is up
-          and the destination is RTN_UNICAST (and not local), then create
+          and the destination is a non-local unicast, then create
           a cache_bypass connection entry */
-       if (sysctl_ip_vs_cache_bypass && svc->fwmark
-           && (inet_addr_type(&init_net, iph->daddr) == RTN_UNICAST)) {
+       if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
                int ret, cs;
                struct ip_vs_conn *cp;
 
@@ -429,9 +462,9 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 
                /* create a new connection entry */
                IP_VS_DBG(6, "ip_vs_leave: create a cache_bypass entry\n");
-               cp = ip_vs_conn_new(iph->protocol,
-                                   iph->saddr, pptr[0],
-                                   iph->daddr, pptr[1],
+               cp = ip_vs_conn_new(svc->af, iph.protocol,
+                                   &iph.saddr, pptr[0],
+                                   &iph.daddr, pptr[1],
                                    0, 0,
                                    IP_VS_CONN_F_BYPASS,
                                    NULL);
@@ -473,7 +506,14 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
         * created, the TCP RST packet cannot be sent, instead that
         * ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ
         */
-       icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+#ifdef CONFIG_IP_VS_IPV6
+       if (svc->af == AF_INET6)
+               icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0,
+                           skb->dev);
+       else
+#endif
+               icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+
        return NF_DROP;
 }
 
@@ -512,6 +552,14 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
        return err;
 }
 
+#ifdef CONFIG_IP_VS_IPV6
+static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user)
+{
+       /* TODO IPv6: Find out what to do here for IPv6 */
+       return 0;
+}
+#endif
+
 /*
  * Packet has been made sufficiently writable in caller
  * - inout: 1=in->out, 0=out->in
@@ -526,14 +574,14 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
        struct iphdr *ciph       = (struct iphdr *)(icmph + 1);
 
        if (inout) {
-               iph->saddr = cp->vaddr;
+               iph->saddr = cp->vaddr.ip;
                ip_send_check(iph);
-               ciph->daddr = cp->vaddr;
+               ciph->daddr = cp->vaddr.ip;
                ip_send_check(ciph);
        } else {
-               iph->daddr = cp->daddr;
+               iph->daddr = cp->daddr.ip;
                ip_send_check(iph);
-               ciph->saddr = cp->daddr;
+               ciph->saddr = cp->daddr.ip;
                ip_send_check(ciph);
        }
 
@@ -560,21 +608,112 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
                        "Forwarding altered incoming ICMP");
 }
 
+#ifdef CONFIG_IP_VS_IPV6
+void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
+                   struct ip_vs_conn *cp, int inout)
+{
+       struct ipv6hdr *iph      = ipv6_hdr(skb);
+       unsigned int icmp_offset = sizeof(struct ipv6hdr);
+       struct icmp6hdr *icmph   = (struct icmp6hdr *)(skb_network_header(skb) +
+                                                     icmp_offset);
+       struct ipv6hdr *ciph     = (struct ipv6hdr *)(icmph + 1);
+
+       if (inout) {
+               iph->saddr = cp->vaddr.in6;
+               ciph->daddr = cp->vaddr.in6;
+       } else {
+               iph->daddr = cp->daddr.in6;
+               ciph->saddr = cp->daddr.in6;
+       }
+
+       /* the TCP/UDP port */
+       if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr) {
+               __be16 *ports = (void *)ciph + sizeof(struct ipv6hdr);
+
+               if (inout)
+                       ports[1] = cp->vport;
+               else
+                       ports[0] = cp->dport;
+       }
+
+       /* And finally the ICMP checksum */
+       icmph->icmp6_cksum = 0;
+       /* TODO IPv6: is this correct for ICMPv6? */
+       ip_vs_checksum_complete(skb, icmp_offset);
+       skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+       if (inout)
+               IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
+                       "Forwarding altered outgoing ICMPv6");
+       else
+               IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
+                       "Forwarding altered incoming ICMPv6");
+}
+#endif
+
+/* Handle relevant response ICMP messages - forward to the right
+ * destination host. Used for NAT and local client.
+ */
+static int handle_response_icmp(int af, struct sk_buff *skb,
+                               union nf_inet_addr *snet,
+                               __u8 protocol, struct ip_vs_conn *cp,
+                               struct ip_vs_protocol *pp,
+                               unsigned int offset, unsigned int ihl)
+{
+       unsigned int verdict = NF_DROP;
+
+       if (IP_VS_FWD_METHOD(cp) != 0) {
+               IP_VS_ERR("shouldn't reach here, because the box is on the "
+                         "half connection in the tun/dr module.\n");
+       }
+
+       /* Ensure the checksum is correct */
+       if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
+               /* Failed checksum! */
+               IP_VS_DBG_BUF(1, "Forward ICMP: failed checksum from %s!\n",
+                             IP_VS_DBG_ADDR(af, snet));
+               goto out;
+       }
+
+       if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol)
+               offset += 2 * sizeof(__u16);
+       if (!skb_make_writable(skb, offset))
+               goto out;
+
+#ifdef CONFIG_IP_VS_IPV6
+       if (af == AF_INET6)
+               ip_vs_nat_icmp_v6(skb, pp, cp, 1);
+       else
+#endif
+               ip_vs_nat_icmp(skb, pp, cp, 1);
+
+       /* do the statistics and put it back */
+       ip_vs_out_stats(cp, skb);
+
+       skb->ipvs_property = 1;
+       verdict = NF_ACCEPT;
+
+out:
+       __ip_vs_conn_put(cp);
+
+       return verdict;
+}
+
 /*
  *     Handle ICMP messages in the inside-to-outside direction (outgoing).
- *     Find any that might be relevant, check against existing connections,
- *     forward to the right destination host if relevant.
+ *     Find any that might be relevant, check against existing connections.
  *     Currently handles error types - unreachable, quench, ttl exceeded.
- *     (Only used in VS/NAT)
  */
 static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
 {
        struct iphdr *iph;
        struct icmphdr  _icmph, *ic;
        struct iphdr    _ciph, *cih;    /* The ip header contained within the ICMP */
+       struct ip_vs_iphdr ciph;
        struct ip_vs_conn *cp;
        struct ip_vs_protocol *pp;
-       unsigned int offset, ihl, verdict;
+       unsigned int offset, ihl;
+       union nf_inet_addr snet;
 
        *related = 1;
 
@@ -627,102 +766,231 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
 
        offset += cih->ihl * 4;
 
+       ip_vs_fill_iphdr(AF_INET, cih, &ciph);
        /* The embedded headers contain source and dest in reverse order */
-       cp = pp->conn_out_get(skb, pp, cih, offset, 1);
+       cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
        if (!cp)
                return NF_ACCEPT;
 
-       verdict = NF_DROP;
+       snet.ip = iph->saddr;
+       return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp,
+                                   pp, offset, ihl);
+}
 
-       if (IP_VS_FWD_METHOD(cp) != 0) {
-               IP_VS_ERR("shouldn't reach here, because the box is on the "
-                         "half connection in the tun/dr module.\n");
+#ifdef CONFIG_IP_VS_IPV6
+static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
+{
+       struct ipv6hdr *iph;
+       struct icmp6hdr _icmph, *ic;
+       struct ipv6hdr  _ciph, *cih;    /* The ip header contained
+                                          within the ICMP */
+       struct ip_vs_iphdr ciph;
+       struct ip_vs_conn *cp;
+       struct ip_vs_protocol *pp;
+       unsigned int offset;
+       union nf_inet_addr snet;
+
+       *related = 1;
+
+       /* reassemble IP fragments */
+       if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
+               if (ip_vs_gather_frags_v6(skb, IP_DEFRAG_VS_OUT))
+                       return NF_STOLEN;
        }
 
-       /* Ensure the checksum is correct */
-       if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
-               /* Failed checksum! */
-               IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n",
-                         NIPQUAD(iph->saddr));
-               goto out;
+       iph = ipv6_hdr(skb);
+       offset = sizeof(struct ipv6hdr);
+       ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+       if (ic == NULL)
+               return NF_DROP;
+
+       IP_VS_DBG(12, "Outgoing ICMPv6 (%d,%d) " NIP6_FMT "->" NIP6_FMT "\n",
+                 ic->icmp6_type, ntohs(icmpv6_id(ic)),
+                 NIP6(iph->saddr), NIP6(iph->daddr));
+
+       /*
+        * Work through seeing if this is for us.
+        * These checks are supposed to be in an order that means easy
+        * things are checked first to speed up processing.... however
+        * this means that some packets will manage to get a long way
+        * down this stack and then be rejected, but that's life.
+        */
+       if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) &&
+           (ic->icmp6_type != ICMPV6_PKT_TOOBIG) &&
+           (ic->icmp6_type != ICMPV6_TIME_EXCEED)) {
+               *related = 0;
+               return NF_ACCEPT;
        }
 
-       if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
-               offset += 2 * sizeof(__u16);
-       if (!skb_make_writable(skb, offset))
-               goto out;
+       /* Now find the contained IP header */
+       offset += sizeof(_icmph);
+       cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+       if (cih == NULL)
+               return NF_ACCEPT; /* The packet looks wrong, ignore */
 
-       ip_vs_nat_icmp(skb, pp, cp, 1);
+       pp = ip_vs_proto_get(cih->nexthdr);
+       if (!pp)
+               return NF_ACCEPT;
 
-       /* do the statistics and put it back */
-       ip_vs_out_stats(cp, skb);
+       /* Is the embedded protocol header present? */
+       /* TODO: we don't support fragmentation at the moment anyways */
+       if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
+               return NF_ACCEPT;
 
-       skb->ipvs_property = 1;
-       verdict = NF_ACCEPT;
+       IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMPv6 for");
 
-  out:
-       __ip_vs_conn_put(cp);
+       offset += sizeof(struct ipv6hdr);
 
-       return verdict;
+       ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
+       /* The embedded headers contain source and dest in reverse order */
+       cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
+       if (!cp)
+               return NF_ACCEPT;
+
+       ipv6_addr_copy(&snet.in6, &iph->saddr);
+       return handle_response_icmp(AF_INET6, skb, &snet, cih->nexthdr, cp,
+                                   pp, offset, sizeof(struct ipv6hdr));
 }
+#endif
 
-static inline int is_tcp_reset(const struct sk_buff *skb)
+static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
 {
        struct tcphdr _tcph, *th;
 
-       th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
+       th = skb_header_pointer(skb, nh_len, sizeof(_tcph), &_tcph);
        if (th == NULL)
                return 0;
        return th->rst;
 }
 
+/* Handle response packets: rewrite addresses and send away...
+ * Used for NAT and local client.
+ */
+static unsigned int
+handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+               struct ip_vs_conn *cp, int ihl)
+{
+       IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
+
+       if (!skb_make_writable(skb, ihl))
+               goto drop;
+
+       /* mangle the packet */
+       if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
+               goto drop;
+
+#ifdef CONFIG_IP_VS_IPV6
+       if (af == AF_INET6)
+               ipv6_hdr(skb)->saddr = cp->vaddr.in6;
+       else
+#endif
+       {
+               ip_hdr(skb)->saddr = cp->vaddr.ip;
+               ip_send_check(ip_hdr(skb));
+       }
+
+       /* For policy routing, packets originating from this
+        * machine itself may be routed differently to packets
+        * passing through.  We want this packet to be routed as
+        * if it came from this machine itself.  So re-compute
+        * the routing information.
+        */
+#ifdef CONFIG_IP_VS_IPV6
+       if (af == AF_INET6) {
+               if (ip6_route_me_harder(skb) != 0)
+                       goto drop;
+       } else
+#endif
+               if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
+                       goto drop;
+
+       IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
+
+       ip_vs_out_stats(cp, skb);
+       ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
+       ip_vs_conn_put(cp);
+
+       skb->ipvs_property = 1;
+
+       LeaveFunction(11);
+       return NF_ACCEPT;
+
+drop:
+       ip_vs_conn_put(cp);
+       kfree_skb(skb);
+       return NF_STOLEN;
+}
+
 /*
  *     It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
- *     Check if outgoing packet belongs to the established ip_vs_conn,
- *      rewrite addresses of the packet and send it on its way...
+ *     Check if outgoing packet belongs to the established ip_vs_conn.
  */
 static unsigned int
 ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
          const struct net_device *in, const struct net_device *out,
          int (*okfn)(struct sk_buff *))
 {
-       struct iphdr    *iph;
+       struct ip_vs_iphdr iph;
        struct ip_vs_protocol *pp;
        struct ip_vs_conn *cp;
-       int ihl;
+       int af;
 
        EnterFunction(11);
 
+       af = (skb->protocol == __constant_htons(ETH_P_IP)) ? AF_INET : AF_INET6;
+
        if (skb->ipvs_property)
                return NF_ACCEPT;
 
-       iph = ip_hdr(skb);
-       if (unlikely(iph->protocol == IPPROTO_ICMP)) {
-               int related, verdict = ip_vs_out_icmp(skb, &related);
+       ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+#ifdef CONFIG_IP_VS_IPV6
+       if (af == AF_INET6) {
+               if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
+                       int related, verdict = ip_vs_out_icmp_v6(skb, &related);
 
-               if (related)
-                       return verdict;
-               iph = ip_hdr(skb);
-       }
+                       if (related)
+                               return verdict;
+                       ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+               }
+       } else
+#endif
+               if (unlikely(iph.protocol == IPPROTO_ICMP)) {
+                       int related, verdict = ip_vs_out_icmp(skb, &related);
 
-       pp = ip_vs_proto_get(iph->protocol);
+                       if (related)
+                               return verdict;
+                       ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+               }
+
+       pp = ip_vs_proto_get(iph.protocol);
        if (unlikely(!pp))
                return NF_ACCEPT;
 
        /* reassemble IP fragments */
-       if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) &&
-                    !pp->dont_defrag)) {
-               if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
-                       return NF_STOLEN;
-               iph = ip_hdr(skb);
-       }
+#ifdef CONFIG_IP_VS_IPV6
+       if (af == AF_INET6) {
+               if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
+                       int related, verdict = ip_vs_out_icmp_v6(skb, &related);
+
+                       if (related)
+                               return verdict;
 
-       ihl = iph->ihl << 2;
+                       ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+               }
+       } else
+#endif
+               if (unlikely(ip_hdr(skb)->frag_off & htons(IP_MF|IP_OFFSET) &&
+                            !pp->dont_defrag)) {
+                       if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
+                               return NF_STOLEN;
+
+                       ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+               }
 
        /*
         * Check if the packet belongs to an existing entry
         */
-       cp = pp->conn_out_get(skb, pp, iph, ihl, 0);
+       cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
 
        if (unlikely(!cp)) {
                if (sysctl_ip_vs_nat_icmp_send &&
@@ -730,21 +998,31 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
                     pp->protocol == IPPROTO_UDP)) {
                        __be16 _ports[2], *pptr;
 
-                       pptr = skb_header_pointer(skb, ihl,
+                       pptr = skb_header_pointer(skb, iph.len,
                                                  sizeof(_ports), _ports);
                        if (pptr == NULL)
                                return NF_ACCEPT;       /* Not for me */
-                       if (ip_vs_lookup_real_service(iph->protocol,
-                                                     iph->saddr, pptr[0])) {
+                       if (ip_vs_lookup_real_service(af, iph.protocol,
+                                                     &iph.saddr,
+                                                     pptr[0])) {
                                /*
                                 * Notify the real server: there is no
                                 * existing entry if it is not RST
                                 * packet or not TCP packet.
                                 */
-                               if (iph->protocol != IPPROTO_TCP
-                                   || !is_tcp_reset(skb)) {
-                                       icmp_send(skb,ICMP_DEST_UNREACH,
-                                                 ICMP_PORT_UNREACH, 0);
+                               if (iph.protocol != IPPROTO_TCP
+                                   || !is_tcp_reset(skb, iph.len)) {
+#ifdef CONFIG_IP_VS_IPV6
+                                       if (af == AF_INET6)
+                                               icmpv6_send(skb,
+                                                           ICMPV6_DEST_UNREACH,
+                                                           ICMPV6_PORT_UNREACH,
+                                                           0, skb->dev);
+                                       else
+#endif
+                                               icmp_send(skb,
+                                                         ICMP_DEST_UNREACH,
+                                                         ICMP_PORT_UNREACH, 0);
                                        return NF_DROP;
                                }
                        }
@@ -754,41 +1032,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
                return NF_ACCEPT;
        }
 
-       IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
-
-       if (!skb_make_writable(skb, ihl))
-               goto drop;
-
-       /* mangle the packet */
-       if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
-               goto drop;
-       ip_hdr(skb)->saddr = cp->vaddr;
-       ip_send_check(ip_hdr(skb));
-
-       /* For policy routing, packets originating from this
-        * machine itself may be routed differently to packets
-        * passing through.  We want this packet to be routed as
-        * if it came from this machine itself.  So re-compute
-        * the routing information.
-        */
-       if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
-               goto drop;
-
-       IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
-
-       ip_vs_out_stats(cp, skb);
-       ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
-       ip_vs_conn_put(cp);
-
-       skb->ipvs_property = 1;
-
-       LeaveFunction(11);
-       return NF_ACCEPT;
-
-  drop:
-       ip_vs_conn_put(cp);
-       kfree_skb(skb);
-       return NF_STOLEN;
+       return handle_response(af, skb, pp, cp, iph.len);
 }
 
 
@@ -804,9 +1048,11 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
        struct iphdr *iph;
        struct icmphdr  _icmph, *ic;
        struct iphdr    _ciph, *cih;    /* The ip header contained within the ICMP */
+       struct ip_vs_iphdr ciph;
        struct ip_vs_conn *cp;
        struct ip_vs_protocol *pp;
        unsigned int offset, ihl, verdict;
+       union nf_inet_addr snet;
 
        *related = 1;
 
@@ -860,10 +1106,20 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 
        offset += cih->ihl * 4;
 
+       ip_vs_fill_iphdr(AF_INET, cih, &ciph);
        /* The embedded headers contain source and dest in reverse order */
-       cp = pp->conn_in_get(skb, pp, cih, offset, 1);
-       if (!cp)
+       cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1);
+       if (!cp) {
+               /* The packet could also belong to a local client */
+               cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
+               if (cp) {
+                       snet.ip = iph->saddr;
+                       return handle_response_icmp(AF_INET, skb, &snet,
+                                                   cih->protocol, cp, pp,
+                                                   offset, ihl);
+               }
                return NF_ACCEPT;
+       }
 
        verdict = NF_DROP;
 
@@ -888,6 +1144,105 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
        return verdict;
 }
 
+#ifdef CONFIG_IP_VS_IPV6
+static int
+ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
+{
+       struct ipv6hdr *iph;
+       struct icmp6hdr _icmph, *ic;
+       struct ipv6hdr  _ciph, *cih;    /* The ip header contained
+                                          within the ICMP */
+       struct ip_vs_iphdr ciph;
+       struct ip_vs_conn *cp;
+       struct ip_vs_protocol *pp;
+       unsigned int offset, verdict;
+       union nf_inet_addr snet;
+
+       *related = 1;
+
+       /* reassemble IP fragments */
+       if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
+               if (ip_vs_gather_frags_v6(skb, hooknum == NF_INET_LOCAL_IN ?
+                                              IP_DEFRAG_VS_IN :
+                                              IP_DEFRAG_VS_FWD))
+                       return NF_STOLEN;
+       }
+
+       iph = ipv6_hdr(skb);
+       offset = sizeof(struct ipv6hdr);
+       ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+       if (ic == NULL)
+               return NF_DROP;
+
+       IP_VS_DBG(12, "Incoming ICMPv6 (%d,%d) " NIP6_FMT "->" NIP6_FMT "\n",
+                 ic->icmp6_type, ntohs(icmpv6_id(ic)),
+                 NIP6(iph->saddr), NIP6(iph->daddr));
+
+       /*
+        * Work through seeing if this is for us.
+        * These checks are supposed to be in an order that means easy
+        * things are checked first to speed up processing.... however
+        * this means that some packets will manage to get a long way
+        * down this stack and then be rejected, but that's life.
+        */
+       if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) &&
+           (ic->icmp6_type != ICMPV6_PKT_TOOBIG) &&
+           (ic->icmp6_type != ICMPV6_TIME_EXCEED)) {
+               *related = 0;
+               return NF_ACCEPT;
+       }
+
+       /* Now find the contained IP header */
+       offset += sizeof(_icmph);
+       cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+       if (cih == NULL)
+               return NF_ACCEPT; /* The packet looks wrong, ignore */
+
+       pp = ip_vs_proto_get(cih->nexthdr);
+       if (!pp)
+               return NF_ACCEPT;
+
+       /* Is the embedded protocol header present? */
+       /* TODO: we don't support fragmentation at the moment anyways */
+       if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
+               return NF_ACCEPT;
+
+       IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMPv6 for");
+
+       offset += sizeof(struct ipv6hdr);
+
+       ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
+       /* The embedded headers contain source and dest in reverse order */
+       cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1);
+       if (!cp) {
+               /* The packet could also belong to a local client */
+               cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
+               if (cp) {
+                       ipv6_addr_copy(&snet.in6, &iph->saddr);
+                       return handle_response_icmp(AF_INET6, skb, &snet,
+                                                   cih->nexthdr,
+                                                   cp, pp, offset,
+                                                   sizeof(struct ipv6hdr));
+               }
+               return NF_ACCEPT;
+       }
+
+       verdict = NF_DROP;
+
+       /* do the statistics and put it back */
+       ip_vs_in_stats(cp, skb);
+       if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr)
+               offset += 2 * sizeof(__u16);
+       verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset);
+       /* do not touch skb anymore */
+
+       __ip_vs_conn_put(cp);
+
+       return verdict;
+}
+#endif
+
+
 /*
  *     Check if it's for virtual services, look it up,
  *     and send it on its way...
@@ -897,50 +1252,54 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
         const struct net_device *in, const struct net_device *out,
         int (*okfn)(struct sk_buff *))
 {
-       struct iphdr    *iph;
+       struct ip_vs_iphdr iph;
        struct ip_vs_protocol *pp;
        struct ip_vs_conn *cp;
-       int ret, restart;
-       int ihl;
+       int ret, restart, af;
+
+       af = (skb->protocol == __constant_htons(ETH_P_IP)) ? AF_INET : AF_INET6;
+
+       ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
 
        /*
-        *      Big tappo: only PACKET_HOST (neither loopback nor mcasts)
-        *      ... don't know why 1st test DOES NOT include 2nd (?)
+        *      Big tappo: only PACKET_HOST, including loopback for local client
+        *      Don't handle local packets on IPv6 for now
         */
-       if (unlikely(skb->pkt_type != PACKET_HOST
-                    || skb->dev->flags & IFF_LOOPBACK || skb->sk)) {
-               IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
-                         skb->pkt_type,
-                         ip_hdr(skb)->protocol,
-                         NIPQUAD(ip_hdr(skb)->daddr));
+       if (unlikely(skb->pkt_type != PACKET_HOST)) {
+               IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n",
+                             skb->pkt_type,
+                             iph.protocol,
+                             IP_VS_DBG_ADDR(af, &iph.daddr));
                return NF_ACCEPT;
        }
 
-       iph = ip_hdr(skb);
-       if (unlikely(iph->protocol == IPPROTO_ICMP)) {
+       if (unlikely(iph.protocol == IPPROTO_ICMP)) {
                int related, verdict = ip_vs_in_icmp(skb, &related, hooknum);
 
                if (related)
                        return verdict;
-               iph = ip_hdr(skb);
+               ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
        }
 
        /* Protocol supported? */
-       pp = ip_vs_proto_get(iph->protocol);
+       pp = ip_vs_proto_get(iph.protocol);
        if (unlikely(!pp))
                return NF_ACCEPT;
 
-       ihl = iph->ihl << 2;
-
        /*
         * Check if the packet belongs to an existing connection entry
         */
-       cp = pp->conn_in_get(skb, pp, iph, ihl, 0);
+       cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0);
 
        if (unlikely(!cp)) {
                int v;
 
-               if (!pp->conn_schedule(skb, pp, &v, &cp))
+               /* For local client packets, it could be a response */
+               cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
+               if (cp)
+                       return handle_response(af, skb, pp, cp, iph.len);
+
+               if (!pp->conn_schedule(af, skb, pp, &v, &cp))
                        return v;
        }
 
@@ -984,7 +1343,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
         * encorage the standby servers to update the connections timeout
         */
        atomic_inc(&cp->in_pkts);
-       if ((ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+       if (af == AF_INET &&
+           (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
            (((cp->protocol != IPPROTO_TCP ||
               cp->state == IP_VS_TCP_S_ESTABLISHED) &&
              (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1]
@@ -1023,6 +1383,21 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
        return ip_vs_in_icmp(skb, &r, hooknum);
 }
 
+#ifdef CONFIG_IP_VS_IPV6
+static unsigned int
+ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
+                     const struct net_device *in, const struct net_device *out,
+                     int (*okfn)(struct sk_buff *))
+{
+       int r;
+
+       if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6)
+               return NF_ACCEPT;
+
+       return ip_vs_in_icmp_v6(skb, &r, hooknum);
+}
+#endif
+
 
 static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
        /* After packet filtering, forward packet through VS/DR, VS/TUN,
@@ -1060,6 +1435,43 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
                .hooknum        = NF_INET_POST_ROUTING,
                .priority       = NF_IP_PRI_NAT_SRC-1,
        },
+#ifdef CONFIG_IP_VS_IPV6
+       /* After packet filtering, forward packet through VS/DR, VS/TUN,
+        * or VS/NAT(change destination), so that filtering rules can be
+        * applied to IPVS. */
+       {
+               .hook           = ip_vs_in,
+               .owner          = THIS_MODULE,
+               .pf             = PF_INET6,
+               .hooknum        = NF_INET_LOCAL_IN,
+               .priority       = 100,
+       },
+       /* After packet filtering, change source only for VS/NAT */
+       {
+               .hook           = ip_vs_out,
+               .owner          = THIS_MODULE,
+               .pf             = PF_INET6,
+               .hooknum        = NF_INET_FORWARD,
+               .priority       = 100,
+       },
+       /* After packet filtering (but before ip_vs_out_icmp), catch icmp
+        * destined for 0.0.0.0/0, which is for incoming IPVS connections */
+       {
+               .hook           = ip_vs_forward_icmp_v6,
+               .owner          = THIS_MODULE,
+               .pf             = PF_INET6,
+               .hooknum        = NF_INET_FORWARD,
+               .priority       = 99,
+       },
+       /* Before the netfilter connection tracking, exit from POST_ROUTING */
+       {
+               .hook           = ip_vs_post_routing,
+               .owner          = THIS_MODULE,
+               .pf             = PF_INET6,
+               .hooknum        = NF_INET_POST_ROUTING,
+               .priority       = NF_IP6_PRI_NAT_SRC-1,
+       },
+#endif
 };
 
 
index ede101e..993a83f 100644 (file)
 
 #include <net/net_namespace.h>
 #include <net/ip.h>
+#ifdef CONFIG_IP_VS_IPV6
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#endif
 #include <net/route.h>
 #include <net/sock.h>
 #include <net/genetlink.h>
@@ -91,6 +95,26 @@ int ip_vs_get_debug_level(void)
 }
 #endif
 
+#ifdef CONFIG_IP_VS_IPV6
+/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
+static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
+{
+       struct rt6_info *rt;
+       struct flowi fl = {
+               .oif = 0,
+               .nl_u = {
+                       .ip6_u = {
+                               .daddr = *addr,
+                               .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
+       };
+
+       rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+       if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
+                       return 1;
+
+       return 0;
+}
+#endif
 /*
  *     update_defense_level is called from keventd and from sysctl,
  *     so it needs to protect itself from softirqs
@@ -282,11 +306,19 @@ static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
  *     Returns hash value for virtual service
  */
 static __inline__ unsigned
-ip_vs_svc_hashkey(unsigned proto, __be32 addr, __be16 port)
+ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
+                 __be16 port)
 {
        register unsigned porth = ntohs(port);
+       __be32 addr_fold = addr->ip;
+
+#ifdef CONFIG_IP_VS_IPV6
+       if (af == AF_INET6)
+               addr_fold = addr->ip6[0]^addr->ip6[1]^
+                           addr->ip6[2]^addr->ip6[3];
+#endif
 
-       return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
+       return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
                & IP_VS_SVC_TAB_MASK;
 }
 
@@ -317,7 +349,8 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
                /*
                 *  Hash it by <protocol,addr,port> in ip_vs_svc_table
                 */
-               hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port);
+               hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
+                                        svc->port);
                list_add(&svc->s_list, &ip_vs_svc_table[hash]);
        } else {
                /*
@@ -363,17 +396,19 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
 /*
  *     Get service by {proto,addr,port} in the service table.
  */
-static __inline__ struct ip_vs_service *
-__ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport)
+static inline struct ip_vs_service *
+__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
+                   __be16 vport)
 {
        unsigned hash;
        struct ip_vs_service *svc;
 
        /* Check for "full" addressed entries */
-       hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
+       hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
 
        list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
-               if ((svc->addr == vaddr)
+               if ((svc->af == af)
+                   && ip_vs_addr_equal(af, &svc->addr, vaddr)
                    && (svc->port == vport)
                    && (svc->protocol == protocol)) {
                        /* HIT */
@@ -389,7 +424,8 @@ __ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport)
 /*
  *     Get service by {fwmark} in the service table.
  */
-static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
+static inline struct ip_vs_service *
+__ip_vs_svc_fwm_get(int af, __u32 fwmark)
 {
        unsigned hash;
        struct ip_vs_service *svc;
@@ -398,7 +434,7 @@ static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
        hash = ip_vs_svc_fwm_hashkey(fwmark);
 
        list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
-               if (svc->fwmark == fwmark) {
+               if (svc->fwmark == fwmark && svc->af == af) {
                        /* HIT */
                        atomic_inc(&svc->usecnt);
                        return svc;
@@ -409,7 +445,8 @@ static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
 }
 
 struct ip_vs_service *
-ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
+ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+                 const union nf_inet_addr *vaddr, __be16 vport)
 {
        struct ip_vs_service *svc;
 
@@ -418,14 +455,14 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
        /*
         *      Check the table hashed by fwmark first
         */
-       if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark)))
+       if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
                goto out;
 
        /*
         *      Check the table hashed by <protocol,addr,port>
         *      for "full" addressed entries
         */
-       svc = __ip_vs_service_get(protocol, vaddr, vport);
+       svc = __ip_vs_service_get(af, protocol, vaddr, vport);
 
        if (svc == NULL
            && protocol == IPPROTO_TCP
@@ -435,7 +472,7 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
                 * Check if ftp service entry exists, the packet
                 * might belong to FTP data connections.
                 */
-               svc = __ip_vs_service_get(protocol, vaddr, FTPPORT);
+               svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
        }
 
        if (svc == NULL
@@ -443,16 +480,16 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
                /*
                 * Check if the catch-all port (port zero) exists
                 */
-               svc = __ip_vs_service_get(protocol, vaddr, 0);
+               svc = __ip_vs_service_get(af, protocol, vaddr, 0);
        }
 
   out:
        read_unlock(&__ip_vs_svc_lock);
 
-       IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
-                 fwmark, ip_vs_proto_name(protocol),
-                 NIPQUAD(vaddr), ntohs(vport),
-                 svc?"hit":"not hit");
+       IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
+                     fwmark, ip_vs_proto_name(protocol),
+                     IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
+                     svc ? "hit" : "not hit");
 
        return svc;
 }
@@ -479,11 +516,20 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest)
 /*
  *     Returns hash value for real service
  */
-static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port)
+static inline unsigned ip_vs_rs_hashkey(int af,
+                                           const union nf_inet_addr *addr,
+                                           __be16 port)
 {
        register unsigned porth = ntohs(port);
+       __be32 addr_fold = addr->ip;
+
+#ifdef CONFIG_IP_VS_IPV6
+       if (af == AF_INET6)
+               addr_fold = addr->ip6[0]^addr->ip6[1]^
+                           addr->ip6[2]^addr->ip6[3];
+#endif
 
-       return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
+       return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
                & IP_VS_RTAB_MASK;
 }
 
@@ -503,7 +549,8 @@ static int ip_vs_rs_hash(struct ip_vs_dest *dest)
         *      Hash by proto,addr,port,
         *      which are the parameters of the real service.
         */
-       hash = ip_vs_rs_hashkey(dest->addr, dest->port);
+       hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
+
        list_add(&dest->d_list, &ip_vs_rtable[hash]);
 
        return 1;
@@ -530,7 +577,9 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
  *     Lookup real service by <proto,addr,port> in the real service table.
  */
 struct ip_vs_dest *
-ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
+ip_vs_lookup_real_service(int af, __u16 protocol,
+                         const union nf_inet_addr *daddr,
+                         __be16 dport)
 {
        unsigned hash;
        struct ip_vs_dest *dest;
@@ -539,11 +588,12 @@ ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
         *      Check for "full" addressed entries
         *      Return the first found entry
         */
-       hash = ip_vs_rs_hashkey(daddr, dport);
+       hash = ip_vs_rs_hashkey(af, daddr, dport);
 
        read_lock(&__ip_vs_rs_lock);
        list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
-               if ((dest->addr == daddr)
+               if ((dest->af == af)
+                   && ip_vs_addr_equal(af, &dest->addr, daddr)
                    && (dest->port == dport)
                    && ((dest->protocol == protocol) ||
                        dest->vfwmark)) {
@@ -561,7 +611,8 @@ ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
  *     Lookup destination by {addr,port} in the given service
  */
 static struct ip_vs_dest *
-ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
+ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
+                 __be16 dport)
 {
        struct ip_vs_dest *dest;
 
@@ -569,7 +620,9 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
         * Find the destination for the given service
         */
        list_for_each_entry(dest, &svc->destinations, n_list) {
-               if ((dest->addr == daddr) && (dest->port == dport)) {
+               if ((dest->af == svc->af)
+                   && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
+                   && (dest->port == dport)) {
                        /* HIT */
                        return dest;
                }
@@ -588,13 +641,15 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
  * ip_vs_lookup_real_service() looked promissing, but
  * seems not working as expected.
  */
-struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
-                                   __be32 vaddr, __be16 vport, __u16 protocol)
+struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
+                                  __be16 dport,
+                                  const union nf_inet_addr *vaddr,
+                                  __be16 vport, __u16 protocol)
 {
        struct ip_vs_dest *dest;
        struct ip_vs_service *svc;
 
-       svc = ip_vs_service_get(0, protocol, vaddr, vport);
+       svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
        if (!svc)
                return NULL;
        dest = ip_vs_lookup_dest(svc, daddr, dport);
@@ -615,7 +670,8 @@ struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
  *  scheduling.
  */
 static struct ip_vs_dest *
-ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
+ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
+                    __be16 dport)
 {
        struct ip_vs_dest *dest, *nxt;
 
@@ -623,17 +679,19 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
         * Find the destination in trash
         */
        list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
-               IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
-                         "dest->refcnt=%d\n",
-                         dest->vfwmark,
-                         NIPQUAD(dest->addr), ntohs(dest->port),
-                         atomic_read(&dest->refcnt));
-               if (dest->addr == daddr &&
+               IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
+                             "dest->refcnt=%d\n",
+                             dest->vfwmark,
+                             IP_VS_DBG_ADDR(svc->af, &dest->addr),
+                             ntohs(dest->port),
+                             atomic_read(&dest->refcnt));
+               if (dest->af == svc->af &&
+                   ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
                    dest->port == dport &&
                    dest->vfwmark == svc->fwmark &&
                    dest->protocol == svc->protocol &&
                    (svc->fwmark ||
-                    (dest->vaddr == svc->addr &&
+                    (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
                      dest->vport == svc->port))) {
                        /* HIT */
                        return dest;
@@ -643,10 +701,11 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
                 * Try to purge the destination from trash if not referenced
                 */
                if (atomic_read(&dest->refcnt) == 1) {
-                       IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
-                                 "from trash\n",
-                                 dest->vfwmark,
-                                 NIPQUAD(dest->addr), ntohs(dest->port));
+                       IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
+                                     "from trash\n",
+                                     dest->vfwmark,
+                                     IP_VS_DBG_ADDR(svc->af, &dest->addr),
+                                     ntohs(dest->port));
                        list_del(&dest->n_list);
                        ip_vs_dst_reset(dest);
                        __ip_vs_unbind_svc(dest);
@@ -685,18 +744,7 @@ ip_vs_zero_stats(struct ip_vs_stats *stats)
 {
        spin_lock_bh(&stats->lock);
 
-       stats->conns = 0;
-       stats->inpkts = 0;
-       stats->outpkts = 0;
-       stats->inbytes = 0;
-       stats->outbytes = 0;
-
-       stats->cps = 0;
-       stats->inpps = 0;
-       stats->outpps = 0;
-       stats->inbps = 0;
-       stats->outbps = 0;
-
+       memset(&stats->ustats, 0, sizeof(stats->ustats));
        ip_vs_zero_estimator(stats);
 
        spin_unlock_bh(&stats->lock);
@@ -707,7 +755,7 @@ ip_vs_zero_stats(struct ip_vs_stats *stats)
  */
 static void
 __ip_vs_update_dest(struct ip_vs_service *svc,
-                   struct ip_vs_dest *dest, struct ip_vs_dest_user *udest)
+                   struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
 {
        int conn_flags;
 
@@ -716,10 +764,18 @@ __ip_vs_update_dest(struct ip_vs_service *svc,
        conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
 
        /* check if local node and update the flags */
-       if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) {
-               conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
-                       | IP_VS_CONN_F_LOCALNODE;
-       }
+#ifdef CONFIG_IP_VS_IPV6
+       if (svc->af == AF_INET6) {
+               if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
+                       conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
+                               | IP_VS_CONN_F_LOCALNODE;
+               }
+       } else
+#endif
+               if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
+                       conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
+                               | IP_VS_CONN_F_LOCALNODE;
+               }
 
        /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
        if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
@@ -760,7 +816,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc,
  *     Create a destination for the given service
  */
 static int
-ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
+ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
               struct ip_vs_dest **dest_p)
 {
        struct ip_vs_dest *dest;
@@ -768,9 +824,20 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
 
        EnterFunction(2);
 
-       atype = inet_addr_type(&init_net, udest->addr);
-       if (atype != RTN_LOCAL && atype != RTN_UNICAST)
-               return -EINVAL;
+#ifdef CONFIG_IP_VS_IPV6
+       if (svc->af == AF_INET6) {
+               atype = ipv6_addr_type(&udest->addr.in6);
+               if ((!(atype & IPV6_ADDR_UNICAST) ||
+                       atype & IPV6_ADDR_LINKLOCAL) &&
+                       !__ip_vs_addr_is_local_v6(&udest->addr.in6))
+                       return -EINVAL;
+       } else
+#endif
+       {
+               atype = inet_addr_type(&init_net, udest->addr.ip);
+               if (atype != RTN_LOCAL && atype != RTN_UNICAST)
+                       return -EINVAL;
+       }
 
        dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
        if (dest == NULL) {
@@ -778,11 +845,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
                return -ENOMEM;
        }
 
+       dest->af = svc->af;
        dest->protocol = svc->protocol;
        dest->vaddr = svc->addr;
        dest->vport = svc->port;
        dest->vfwmark = svc->fwmark;
-       dest->addr = udest->addr;
+       ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
        dest->port = udest->port;
 
        atomic_set(&dest->activeconns, 0);
@@ -807,10 +875,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
  *     Add a destination into an existing service
  */
 static int
-ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
+ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 {
        struct ip_vs_dest *dest;
-       __be32 daddr = udest->addr;
+       union nf_inet_addr daddr;
        __be16 dport = udest->port;
        int ret;
 
@@ -827,10 +895,13 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
                return -ERANGE;
        }
 
+       ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
+
        /*
         * Check if the dest already exists in the list
         */
-       dest = ip_vs_lookup_dest(svc, daddr, dport);
+       dest = ip_vs_lookup_dest(svc, &daddr, dport);
+
        if (dest != NULL) {
                IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
                return -EEXIST;
@@ -840,15 +911,17 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
         * Check if the dest already exists in the trash and
         * is from the same service
         */
-       dest = ip_vs_trash_get_dest(svc, daddr, dport);
+       dest = ip_vs_trash_get_dest(svc, &daddr, dport);
+
        if (dest != NULL) {
-               IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
-                         "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
-                         NIPQUAD(daddr), ntohs(dport),
-                         atomic_read(&dest->refcnt),
-                         dest->vfwmark,
-                         NIPQUAD(dest->vaddr),
-                         ntohs(dest->vport));
+               IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
+                             "dest->refcnt=%d, service %u/%s:%u\n",
+                             IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
+                             atomic_read(&dest->refcnt),
+                             dest->vfwmark,
+                             IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
+                             ntohs(dest->vport));
+
                __ip_vs_update_dest(svc, dest, udest);
 
                /*
@@ -915,10 +988,10 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
  *     Edit a destination in the given service
  */
 static int
-ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
+ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 {
        struct ip_vs_dest *dest;
-       __be32 daddr = udest->addr;
+       union nf_inet_addr daddr;
        __be16 dport = udest->port;
 
        EnterFunction(2);
@@ -934,10 +1007,13 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
                return -ERANGE;
        }
 
+       ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
+
        /*
         *  Lookup the destination list
         */
-       dest = ip_vs_lookup_dest(svc, daddr, dport);
+       dest = ip_vs_lookup_dest(svc, &daddr, dport);
+
        if (dest == NULL) {
                IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
                return -ENOENT;
@@ -991,10 +1067,11 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
                atomic_dec(&dest->svc->refcnt);
                kfree(dest);
        } else {
-               IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
-                         "dest->refcnt=%d\n",
-                         NIPQUAD(dest->addr), ntohs(dest->port),
-                         atomic_read(&dest->refcnt));
+               IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
+                             "dest->refcnt=%d\n",
+                             IP_VS_DBG_ADDR(dest->af, &dest->addr),
+                             ntohs(dest->port),
+                             atomic_read(&dest->refcnt));
                list_add(&dest->n_list, &ip_vs_dest_trash);
                atomic_inc(&dest->refcnt);
        }
@@ -1028,15 +1105,15 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
  *     Delete a destination server in the given service
  */
 static int
-ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest)
+ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 {
        struct ip_vs_dest *dest;
-       __be32 daddr = udest->addr;
        __be16 dport = udest->port;
 
        EnterFunction(2);
 
-       dest = ip_vs_lookup_dest(svc, daddr, dport);
+       dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
+
        if (dest == NULL) {
                IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
                return -ENOENT;
@@ -1071,7 +1148,8 @@ ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest)
  *     Add a service into the service hash table
  */
 static int
-ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
+ip_vs_add_service(struct ip_vs_service_user_kern *u,
+                 struct ip_vs_service **svc_p)
 {
        int ret = 0;
        struct ip_vs_scheduler *sched = NULL;
@@ -1089,6 +1167,19 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
                goto out_mod_dec;
        }
 
+#ifdef CONFIG_IP_VS_IPV6
+       if (u->af == AF_INET6) {
+               if (!sched->supports_ipv6) {
+                       ret = -EAFNOSUPPORT;
+                       goto out_err;
+               }
+               if ((u->netmask < 1) || (u->netmask > 128)) {
+                       ret = -EINVAL;
+                       goto out_err;
+               }
+       }
+#endif
+
        svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
        if (svc == NULL) {
                IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
@@ -1100,8 +1191,9 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
        atomic_set(&svc->usecnt, 1);
        atomic_set(&svc->refcnt, 0);
 
+       svc->af = u->af;
        svc->protocol = u->protocol;
-       svc->addr = u->addr;
+       ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
        svc->port = u->port;
        svc->fwmark = u->fwmark;
        svc->flags = u->flags;
@@ -1125,7 +1217,10 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
                atomic_inc(&ip_vs_nullsvc_counter);
 
        ip_vs_new_estimator(&svc->stats);
-       ip_vs_num_services++;
+
+       /* Count only IPv4 services for old get/setsockopt interface */
+       if (svc->af == AF_INET)
+               ip_vs_num_services++;
 
        /* Hash the service into the service table */
        write_lock_bh(&__ip_vs_svc_lock);
@@ -1160,7 +1255,7 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
  *     Edit a service and bind it with a new scheduler
  */
 static int
-ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u)
+ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
 {
        struct ip_vs_scheduler *sched, *old_sched;
        int ret = 0;
@@ -1176,6 +1271,19 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u)
        }
        old_sched = sched;
 
+#ifdef CONFIG_IP_VS_IPV6
+       if (u->af == AF_INET6) {
+               if (!sched->supports_ipv6) {
+                       ret = -EAFNOSUPPORT;
+                       goto out;
+               }
+               if ((u->netmask < 1) || (u->netmask > 128)) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+       }
+#endif
+
        write_lock_bh(&__ip_vs_svc_lock);
 
        /*
@@ -1240,7 +1348,10 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
        struct ip_vs_dest *dest, *nxt;
        struct ip_vs_scheduler *old_sched;
 
-       ip_vs_num_services--;
+       /* Count only IPv4 services for old get/setsockopt interface */
+       if (svc->af == AF_INET)
+               ip_vs_num_services--;
+
        ip_vs_kill_estimator(&svc->stats);
 
        /* Unbind scheduler */
@@ -1748,15 +1859,25 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
                const struct ip_vs_iter *iter = seq->private;
                const struct ip_vs_dest *dest;
 
-               if (iter->table == ip_vs_svc_table)
-                       seq_printf(seq, "%s  %08X:%04X %s ",
-                                  ip_vs_proto_name(svc->protocol),
-                                  ntohl(svc->addr),
-                                  ntohs(svc->port),
-                                  svc->scheduler->name);
-               else
+               if (iter->table == ip_vs_svc_table) {
+#ifdef CONFIG_IP_VS_IPV6
+                       if (svc->af == AF_INET6)
+                               seq_printf(seq, "%s  [" NIP6_FMT "]:%04X %s ",
+                                          ip_vs_proto_name(svc->protocol),
+                                          NIP6(svc->addr.in6),
+                                          ntohs(svc->port),
+                                          svc->scheduler->name);
+                       else
+#endif
+                               seq_printf(seq, "%s  %08X:%04X %s ",
+                                          ip_vs_proto_name(svc->protocol),
+                                          ntohl(svc->addr.ip),
+                                          ntohs(svc->port),
+                                          svc->scheduler->name);
+               } else {
                        seq_printf(seq, "FWM  %08X %s ",
                                   svc->fwmark, svc->scheduler->name);
+               }
 
                if (svc->flags & IP_VS_SVC_F_PERSISTENT)
                        seq_printf(seq, "persistent %d %08X\n",
@@ -1766,13 +1887,29 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
                        seq_putc(seq, '\n');
 
                list_for_each_entry(dest, &svc->destinations, n_list) {
-                       seq_printf(seq,
-                                  "  -> %08X:%04X      %-7s %-6d %-10d %-10d\n",
-                                  ntohl(dest->addr), ntohs(dest->port),
-                                  ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
-                                  atomic_read(&dest->weight),
-                                  atomic_read(&dest->activeconns),
-                                  atomic_read(&dest->inactconns));
+#ifdef CONFIG_IP_VS_IPV6
+                       if (dest->af == AF_INET6)
+                               seq_printf(seq,
+                                          "  -> [" NIP6_FMT "]:%04X"
+                                          "      %-7s %-6d %-10d %-10d\n",
+                                          NIP6(dest->addr.in6),
+                                          ntohs(dest->port),
+                                          ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
+                                          atomic_read(&dest->weight),
+                                          atomic_read(&dest->activeconns),
+                                          atomic_read(&dest->inactconns));
+                       else
+#endif
+                               seq_printf(seq,
+                                          "  -> %08X:%04X      "
+                                          "%-7s %-6d %-10d %-10d\n",
+                                          ntohl(dest->addr.ip),
+                                          ntohs(dest->port),
+                                          ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
+                                          atomic_read(&dest->weight),
+                                          atomic_read(&dest->activeconns),
+                                          atomic_read(&dest->inactconns));
+
                }
        }
        return 0;
@@ -1816,20 +1953,20 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
                   "   Conns  Packets  Packets            Bytes            Bytes\n");
 
        spin_lock_bh(&ip_vs_stats.lock);
-       seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
-                  ip_vs_stats.inpkts, ip_vs_stats.outpkts,
-                  (unsigned long long) ip_vs_stats.inbytes,
-                  (unsigned long long) ip_vs_stats.outbytes);
+       seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
+                  ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
+                  (unsigned long long) ip_vs_stats.ustats.inbytes,
+                  (unsigned long long) ip_vs_stats.ustats.outbytes);
 
 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
        seq_puts(seq,
                   " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
        seq_printf(seq,"%8X %8X %8X %16X %16X\n",
-                       ip_vs_stats.cps,
-                       ip_vs_stats.inpps,
-                       ip_vs_stats.outpps,
-                       ip_vs_stats.inbps,
-                       ip_vs_stats.outbps);
+                       ip_vs_stats.ustats.cps,
+                       ip_vs_stats.ustats.inpps,
+                       ip_vs_stats.ustats.outpps,
+                       ip_vs_stats.ustats.inbps,
+                       ip_vs_stats.ustats.outbps);
        spin_unlock_bh(&ip_vs_stats.lock);
 
        return 0;
@@ -1904,14 +2041,44 @@ static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
        [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
 };
 
+static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
+                                 struct ip_vs_service_user *usvc_compat)
+{
+       usvc->af                = AF_INET;
+       usvc->protocol          = usvc_compat->protocol;
+       usvc->addr.ip           = usvc_compat->addr;
+       usvc->port              = usvc_compat->port;
+       usvc->fwmark            = usvc_compat->fwmark;
+
+       /* Deep copy of sched_name is not needed here */
+       usvc->sched_name        = usvc_compat->sched_name;
+
+       usvc->flags             = usvc_compat->flags;
+       usvc->timeout           = usvc_compat->timeout;
+       usvc->netmask           = usvc_compat->netmask;
+}
+
+static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
+                                  struct ip_vs_dest_user *udest_compat)
+{
+       udest->addr.ip          = udest_compat->addr;
+       udest->port             = udest_compat->port;
+       udest->conn_flags       = udest_compat->conn_flags;
+       udest->weight           = udest_compat->weight;
+       udest->u_threshold      = udest_compat->u_threshold;
+       udest->l_threshold      = udest_compat->l_threshold;
+}
+
 static int
 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 {
        int ret;
        unsigned char arg[MAX_ARG_LEN];
-       struct ip_vs_service_user *usvc;
+       struct ip_vs_service_user *usvc_compat;
+       struct ip_vs_service_user_kern usvc;
        struct ip_vs_service *svc;
-       struct ip_vs_dest_user *udest;
+       struct ip_vs_dest_user *udest_compat;
+       struct ip_vs_dest_user_kern udest;
 
        if (!capable(CAP_NET_ADMIN))
                return -EPERM;
@@ -1951,35 +2118,40 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
                goto out_unlock;
        }
 
-       usvc = (struct ip_vs_service_user *)arg;
-       udest = (struct ip_vs_dest_user *)(usvc + 1);
+       usvc_compat = (struct ip_vs_service_user *)arg;
+       udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
+
+       /* We only use the new structs internally, so copy userspace compat
+        * structs to extended internal versions */
+       ip_vs_copy_usvc_compat(&usvc, usvc_compat);
+       ip_vs_copy_udest_compat(&udest, udest_compat);
 
        if (cmd == IP_VS_SO_SET_ZERO) {
                /* if no service address is set, zero counters in all */
-               if (!usvc->fwmark && !usvc->addr && !usvc->port) {
+               if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
                        ret = ip_vs_zero_all();
                        goto out_unlock;
                }
        }
 
        /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
-       if (usvc->protocol!=IPPROTO_TCP && usvc->protocol!=IPPROTO_UDP) {
+       if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
                IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
-                         usvc->protocol, NIPQUAD(usvc->addr),
-                         ntohs(usvc->port), usvc->sched_name);
+                         usvc.protocol, NIPQUAD(usvc.addr.ip),
+                         ntohs(usvc.port), usvc.sched_name);
                ret = -EFAULT;
                goto out_unlock;
        }
 
        /* Lookup the exact service by <protocol, addr, port> or fwmark */
-       if (usvc->fwmark == 0)
-               svc = __ip_vs_service_get(usvc->protocol,
-                                         usvc->addr, usvc->port);
+       if (usvc.fwmark == 0)
+               svc = __ip_vs_service_get(usvc.af, usvc.protocol,
+                                         &usvc.addr, usvc.port);
        else
-               svc = __ip_vs_svc_fwm_get(usvc->fwmark);
+               svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
 
        if (cmd != IP_VS_SO_SET_ADD
-           && (svc == NULL || svc->protocol != usvc->protocol)) {
+           && (svc == NULL || svc->protocol != usvc.protocol)) {
                ret = -ESRCH;
                goto out_unlock;
        }
@@ -1989,10 +2161,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
                if (svc != NULL)
                        ret = -EEXIST;
                else
-                       ret = ip_vs_add_service(usvc, &svc);
+                       ret = ip_vs_add_service(&usvc, &svc);
                break;
        case IP_VS_SO_SET_EDIT:
-               ret = ip_vs_edit_service(svc, usvc);
+               ret = ip_vs_edit_service(svc, &usvc);
                break;
        case IP_VS_SO_SET_DEL:
                ret = ip_vs_del_service(svc);
@@ -2003,13 +2175,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
                ret = ip_vs_zero_service(svc);
                break;
        case IP_VS_SO_SET_ADDDEST:
-               ret = ip_vs_add_dest(svc, udest);
+               ret = ip_vs_add_dest(svc, &udest);
                break;
        case IP_VS_SO_SET_EDITDEST:
-               ret = ip_vs_edit_dest(svc, udest);
+               ret = ip_vs_edit_dest(svc, &udest);
                break;
        case IP_VS_SO_SET_DELDEST:
-               ret = ip_vs_del_dest(svc, udest);
+               ret = ip_vs_del_dest(svc, &udest);
                break;
        default:
                ret = -EINVAL;
@@ -2032,7 +2204,7 @@ static void
 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
 {
        spin_lock_bh(&src->lock);
-       memcpy(dst, src, (char*)&src->lock - (char*)src);
+       memcpy(dst, &src->ustats, sizeof(*dst));
        spin_unlock_bh(&src->lock);
 }
 
@@ -2040,7 +2212,7 @@ static void
 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
 {
        dst->protocol = src->protocol;
-       dst->addr = src->addr;
+       dst->addr = src->addr.ip;
        dst->port = src->port;
        dst->fwmark = src->fwmark;
        strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
@@ -2062,6 +2234,10 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
 
        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+                       /* Only expose IPv4 entries to old interface */
+                       if (svc->af != AF_INET)
+                               continue;
+
                        if (count >= get->num_services)
                                goto out;
                        memset(&entry, 0, sizeof(entry));
@@ -2077,6 +2253,10 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
 
        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+                       /* Only expose IPv4 entries to old interface */
+                       if (svc->af != AF_INET)
+                               continue;
+
                        if (count >= get->num_services)
                                goto out;
                        memset(&entry, 0, sizeof(entry));
@@ -2098,13 +2278,15 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
                         struct ip_vs_get_dests __user *uptr)
 {
        struct ip_vs_service *svc;
+       union nf_inet_addr addr = { .ip = get->addr };
        int ret = 0;
 
        if (get->fwmark)
-               svc = __ip_vs_svc_fwm_get(get->fwmark);
+               svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
        else
-               svc = __ip_vs_service_get(get->protocol,
-                                         get->addr, get->port);
+               svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
+                                         get->port);
+
        if (svc) {
                int count = 0;
                struct ip_vs_dest *dest;
@@ -2114,7 +2296,7 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
                        if (count >= get->num_dests)
                                break;
 
-                       entry.addr = dest->addr;
+                       entry.addr = dest->addr.ip;
                        entry.port = dest->port;
                        entry.conn_flags = atomic_read(&dest->conn_flags);
                        entry.weight = atomic_read(&dest->weight);
@@ -2239,13 +2421,15 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
        {
                struct ip_vs_service_entry *entry;
                struct ip_vs_service *svc;
+               union nf_inet_addr addr;
 
                entry = (struct ip_vs_service_entry *)arg;
+               addr.ip = entry->addr;
                if (entry->fwmark)
-                       svc = __ip_vs_svc_fwm_get(entry->fwmark);
+                       svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
                else
-                       svc = __ip_vs_service_get(entry->protocol,
-                                                 entry->addr, entry->port);
+                       svc = __ip_vs_service_get(AF_INET, entry->protocol,
+                                                 &addr, entry->port);
                if (svc) {
                        ip_vs_copy_service(entry, svc);
                        if (copy_to_user(user, entry, sizeof(*entry)) != 0)
@@ -2396,16 +2580,16 @@ static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
 
        spin_lock_bh(&stats->lock);
 
-       NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
-       NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
-       NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
-       NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
-       NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
-       NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
-       NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
-       NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
-       NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
-       NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
+       NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
+       NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
+       NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
+       NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
+       NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
+       NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
+       NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
+       NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
+       NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
+       NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
 
        spin_unlock_bh(&stats->lock);
 
@@ -2430,7 +2614,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
        if (!nl_service)
                return -EMSGSIZE;
 
-       NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
+       NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
 
        if (svc->fwmark) {
                NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
@@ -2516,7 +2700,7 @@ nla_put_failure:
        return skb->len;
 }
 
-static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
+static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
                                    struct nlattr *nla, int full_entry)
 {
        struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
@@ -2536,8 +2720,12 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
        if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
                return -EINVAL;
 
-       /* For now, only support IPv4 */
-       if (nla_get_u16(nla_af) != AF_INET)
+       usvc->af = nla_get_u16(nla_af);
+#ifdef CONFIG_IP_VS_IPV6
+       if (usvc->af != AF_INET && usvc->af != AF_INET6)
+#else
+       if (usvc->af != AF_INET)
+#endif
                return -EAFNOSUPPORT;
 
        if (nla_fwmark) {
@@ -2569,10 +2757,10 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
 
                /* prefill flags from service if it already exists */
                if (usvc->fwmark)
-                       svc = __ip_vs_svc_fwm_get(usvc->fwmark);
+                       svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
                else
-                       svc = __ip_vs_service_get(usvc->protocol, usvc->addr,
-                                                 usvc->port);
+                       svc = __ip_vs_service_get(usvc->af, usvc->protocol,
+                                                 &usvc->addr, usvc->port);
                if (svc) {
                        usvc->flags = svc->flags;
                        ip_vs_service_put(svc);
@@ -2582,9 +2770,7 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
                /* set new flags from userland */
                usvc->flags = (usvc->flags & ~flags.mask) |
                              (flags.flags & flags.mask);
-
-               strlcpy(usvc->sched_name, nla_data(nla_sched),
-                       sizeof(usvc->sched_name));
+               usvc->sched_name = nla_data(nla_sched);
                usvc->timeout = nla_get_u32(nla_timeout);
                usvc->netmask = nla_get_u32(nla_netmask);
        }
@@ -2594,7 +2780,7 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
 
 static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
 {
-       struct ip_vs_service_user usvc;
+       struct ip_vs_service_user_kern usvc;
        int ret;
 
        ret = ip_vs_genl_parse_service(&usvc, nla, 0);
@@ -2602,10 +2788,10 @@ static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
                return ERR_PTR(ret);
 
        if (usvc.fwmark)
-               return __ip_vs_svc_fwm_get(usvc.fwmark);
+               return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
        else
-               return __ip_vs_service_get(usvc.protocol, usvc.addr,
-                                          usvc.port);
+               return __ip_vs_service_get(usvc.af, usvc.protocol,
+                                          &usvc.addr, usvc.port);
 }
 
 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
@@ -2704,7 +2890,7 @@ out_err:
        return skb->len;
 }
 
-static int ip_vs_genl_parse_dest(struct ip_vs_dest_user *udest,
+static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
                                 struct nlattr *nla, int full_entry)
 {
        struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
@@ -2860,8 +3046,8 @@ static int ip_vs_genl_set_config(struct nlattr **attrs)
 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 {
        struct ip_vs_service *svc = NULL;
-       struct ip_vs_service_user usvc;
-       struct ip_vs_dest_user udest;
+       struct ip_vs_service_user_kern usvc;
+       struct ip_vs_dest_user_kern udest;
        int ret = 0, cmd;
        int need_full_svc = 0, need_full_dest = 0;
 
@@ -2913,9 +3099,10 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 
        /* Lookup the exact service by <protocol, addr, port> or fwmark */
        if (usvc.fwmark == 0)
-               svc = __ip_vs_service_get(usvc.protocol, usvc.addr, usvc.port);
+               svc = __ip_vs_service_get(usvc.af, usvc.protocol,
+                                         &usvc.addr, usvc.port);
        else
-               svc = __ip_vs_svc_fwm_get(usvc.fwmark);
+               svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
 
        /* Unless we're adding a new service, the service must already exist */
        if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
index fa66824..a16943f 100644 (file)
@@ -218,7 +218,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
        IP_VS_DBG(6, "DH: destination IP address %u.%u.%u.%u "
                  "--> server %u.%u.%u.%u:%d\n",
                  NIPQUAD(iph->daddr),
-                 NIPQUAD(dest->addr),
+                 NIPQUAD(dest->addr.ip),
                  ntohs(dest->port));
 
        return dest;
@@ -234,6 +234,9 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler =
        .refcnt =               ATOMIC_INIT(0),
        .module =               THIS_MODULE,
        .n_list =               LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+       .supports_ipv6 =        0,
+#endif
        .init_service =         ip_vs_dh_init_svc,
        .done_service =         ip_vs_dh_done_svc,
        .update_service =       ip_vs_dh_update_svc,
index 4fb620e..2eb2860 100644 (file)
@@ -65,37 +65,37 @@ static void estimation_timer(unsigned long arg)
                s = container_of(e, struct ip_vs_stats, est);
 
                spin_lock(&s->lock);
-               n_conns = s->conns;
-               n_inpkts = s->inpkts;
-               n_outpkts = s->outpkts;
-               n_inbytes = s->inbytes;
-               n_outbytes = s->outbytes;
+               n_conns = s->ustats.conns;
+               n_inpkts = s->ustats.inpkts;
+               n_outpkts = s->ustats.outpkts;
+               n_inbytes = s->ustats.inbytes;
+               n_outbytes = s->ustats.outbytes;
 
                /* scaled by 2^10, but divided 2 seconds */
                rate = (n_conns - e->last_conns)<<9;
                e->last_conns = n_conns;
                e->cps += ((long)rate - (long)e->cps)>>2;
-               s->cps = (e->cps+0x1FF)>>10;
+               s->ustats.cps = (e->cps+0x1FF)>>10;
 
                rate = (n_inpkts - e->last_inpkts)<<9;
                e->last_inpkts = n_inpkts;
                e->inpps += ((long)rate - (long)e->inpps)>>2;
-               s->inpps = (e->inpps+0x1FF)>>10;
+               s->ustats.inpps = (e->inpps+0x1FF)>>10;
 
                rate = (n_outpkts - e->last_outpkts)<<9;
                e->last_outpkts = n_outpkts;
                e->outpps += ((long)rate - (long)e->outpps)>>2;
-               s->outpps = (e->outpps+0x1FF)>>10;
+               s->ustats.outpps = (e->outpps+0x1FF)>>10;
 
                rate = (n_inbytes - e->last_inbytes)<<4;
                e->last_inbytes = n_inbytes;
                e->inbps += ((long)rate - (long)e->inbps)>>2;
-               s->inbps = (e->inbps+0xF)>>5;
+               s->ustats.inbps = (e->inbps+0xF)>>5;
 
                rate = (n_outbytes - e->last_outbytes)<<4;
                e->last_outbytes = n_outbytes;
                e->outbps += ((long)rate - (long)e->outbps)>>2;
-               s->outbps = (e->outbps+0xF)>>5;
+               s->ustats.outbps = (e->outbps+0xF)>>5;
                spin_unlock(&s->lock);
        }
        spin_unlock(&est_lock);
@@ -108,20 +108,20 @@ void ip_vs_new_estimator(struct ip_vs_stats *stats)
 
        INIT_LIST_HEAD(&est->list);
 
-       est->last_conns = stats->conns;
-       est->cps = stats->cps<<10;
+       est->last_conns = stats->ustats.conns;
+       est->cps = stats->ustats.cps<<10;
 
-       est->last_inpkts = stats->inpkts;
-       est->inpps = stats->inpps<<10;
+       est->last_inpkts = stats->ustats.inpkts;
+       est->inpps = stats->ustats.inpps<<10;
 
-       est->last_outpkts = stats->outpkts;
-       est->outpps = stats->outpps<<10;
+       est->last_outpkts = stats->ustats.outpkts;
+       est->outpps = stats->ustats.outpps<<10;
 
-       est->last_inbytes = stats->inbytes;
-       est->inbps = stats->inbps<<5;
+       est->last_inbytes = stats->ustats.inbytes;
+       est->inbps = stats->ustats.inbps<<5;
 
-       est->last_outbytes = stats->outbytes;
-       est->outbps = stats->outbps<<5;
+       est->last_outbytes = stats->ustats.outbytes;
+       est->outbps = stats->ustats.outbps<<5;
 
        spin_lock_bh(&est_lock);
        list_add(&est->list, &est_list);
index c1c758e..2e7dbd8 100644 (file)
@@ -140,13 +140,21 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
        struct tcphdr *th;
        char *data, *data_limit;
        char *start, *end;
-       __be32 from;
+       union nf_inet_addr from;
        __be16 port;
        struct ip_vs_conn *n_cp;
        char buf[24];           /* xxx.xxx.xxx.xxx,ppp,ppp\000 */
        unsigned buf_len;
        int ret;
 
+#ifdef CONFIG_IP_VS_IPV6
+       /* This application helper doesn't work with IPv6 yet,
+        * so turn this into a no-op for IPv6 packets
+        */
+       if (cp->af == AF_INET6)
+               return 1;
+#endif
+
        *diff = 0;
 
        /* Only useful for established sessions */
@@ -166,24 +174,25 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
                if (ip_vs_ftp_get_addrport(data, data_limit,
                                           SERVER_STRING,
                                           sizeof(SERVER_STRING)-1, ')',
-                                          &from, &port,
+                                          &from.ip, &port,
                                           &start, &end) != 1)
                        return 1;
 
                IP_VS_DBG(7, "PASV response (%u.%u.%u.%u:%d) -> "
                          "%u.%u.%u.%u:%d detected\n",
-                         NIPQUAD(from), ntohs(port), NIPQUAD(cp->caddr), 0);
+                         NIPQUAD(from.ip), ntohs(port),
+                         NIPQUAD(cp->caddr.ip), 0);
 
                /*
                 * Now update or create an connection entry for it
                 */
-               n_cp = ip_vs_conn_out_get(iph->protocol, from, port,
-                                         cp->caddr, 0);
+               n_cp = ip_vs_conn_out_get(AF_INET, iph->protocol, &from, port,
+                                         &cp->caddr, 0);
                if (!n_cp) {
-                       n_cp = ip_vs_conn_new(IPPROTO_TCP,
-                                             cp->caddr, 0,
-                                             cp->vaddr, port,
-                                             from, port,
+                       n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP,
+                                             &cp->caddr, 0,
+                                             &cp->vaddr, port,
+                                             &from, port,
                                              IP_VS_CONN_F_NO_CPORT,
                                              cp->dest);
                        if (!n_cp)
@@ -196,9 +205,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
                /*
                 * Replace the old passive address with the new one
                 */
-               from = n_cp->vaddr;
+               from.ip = n_cp->vaddr.ip;
                port = n_cp->vport;
-               sprintf(buf,"%d,%d,%d,%d,%d,%d", NIPQUAD(from),
+               sprintf(buf, "%d,%d,%d,%d,%d,%d", NIPQUAD(from.ip),
                        (ntohs(port)>>8)&255, ntohs(port)&255);
                buf_len = strlen(buf);
 
@@ -243,10 +252,18 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
        struct tcphdr *th;
        char *data, *data_start, *data_limit;
        char *start, *end;
-       __be32 to;
+       union nf_inet_addr to;
        __be16 port;
        struct ip_vs_conn *n_cp;
 
+#ifdef CONFIG_IP_VS_IPV6
+       /* This application helper doesn't work with IPv6 yet,
+        * so turn this into a no-op for IPv6 packets
+        */
+       if (cp->af == AF_INET6)
+               return 1;
+#endif
+
        /* no diff required for incoming packets */
        *diff = 0;
 
@@ -291,12 +308,12 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
         */
        if (ip_vs_ftp_get_addrport(data_start, data_limit,
                                   CLIENT_STRING, sizeof(CLIENT_STRING)-1,
-                                  '\r', &to, &port,
+                                  '\r', &to.ip, &port,
                                   &start, &end) != 1)
                return 1;
 
        IP_VS_DBG(7, "PORT %u.%u.%u.%u:%d detected\n",
-                 NIPQUAD(to), ntohs(port));
+                 NIPQUAD(to.ip), ntohs(port));
 
        /* Passive mode off */
        cp->app_data = NULL;
@@ -306,16 +323,16 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
         */
        IP_VS_DBG(7, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n",
                  ip_vs_proto_name(iph->protocol),
-                 NIPQUAD(to), ntohs(port), NIPQUAD(cp->vaddr), 0);
+                 NIPQUAD(to.ip), ntohs(port), NIPQUAD(cp->vaddr.ip), 0);
 
-       n_cp = ip_vs_conn_in_get(iph->protocol,
-                                to, port,
-                                cp->vaddr, htons(ntohs(cp->vport)-1));
+       n_cp = ip_vs_conn_in_get(AF_INET, iph->protocol,
+                                &to, port,
+                                &cp->vaddr, htons(ntohs(cp->vport)-1));
        if (!n_cp) {
-               n_cp = ip_vs_conn_new(IPPROTO_TCP,
-                                     to, port,
-                                     cp->vaddr, htons(ntohs(cp->vport)-1),
-                                     cp->daddr, htons(ntohs(cp->dport)-1),
+               n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP,
+                                     &to, port,
+                                     &cp->vaddr, htons(ntohs(cp->vport)-1),
+                                     &cp->daddr, htons(ntohs(cp->dport)-1),
                                      0,
                                      cp->dest);
                if (!n_cp)
index d2a43aa..6ecef35 100644 (file)
@@ -422,7 +422,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
 
        IP_VS_DBG(6, "LBLC: server %d.%d.%d.%d:%d "
                  "activeconns %d refcnt %d weight %d overhead %d\n",
-                 NIPQUAD(least->addr), ntohs(least->port),
+                 NIPQUAD(least->addr.ip), ntohs(least->port),
                  atomic_read(&least->activeconns),
                  atomic_read(&least->refcnt),
                  atomic_read(&least->weight), loh);
@@ -506,7 +506,7 @@ out:
        IP_VS_DBG(6, "LBLC: destination IP address %u.%u.%u.%u "
                  "--> server %u.%u.%u.%u:%d\n",
                  NIPQUAD(iph->daddr),
-                 NIPQUAD(dest->addr),
+                 NIPQUAD(dest->addr.ip),
                  ntohs(dest->port));
 
        return dest;
@@ -522,6 +522,9 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler =
        .refcnt =               ATOMIC_INIT(0),
        .module =               THIS_MODULE,
        .n_list =               LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+       .supports_ipv6 =        0,
+#endif
        .init_service =         ip_vs_lblc_init_svc,
        .done_service =         ip_vs_lblc_done_svc,
        .schedule =             ip_vs_lblc_schedule,
index 375a1ff..1f75ea8 100644 (file)
@@ -204,7 +204,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
 
        IP_VS_DBG(6, "ip_vs_dest_set_min: server %d.%d.%d.%d:%d "
                  "activeconns %d refcnt %d weight %d overhead %d\n",
-                 NIPQUAD(least->addr), ntohs(least->port),
+                 NIPQUAD(least->addr.ip), ntohs(least->port),
                  atomic_read(&least->activeconns),
                  atomic_read(&least->refcnt),
                  atomic_read(&least->weight), loh);
@@ -250,7 +250,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
 
        IP_VS_DBG(6, "ip_vs_dest_set_max: server %d.%d.%d.%d:%d "
                  "activeconns %d refcnt %d weight %d overhead %d\n",
-                 NIPQUAD(most->addr), ntohs(most->port),
+                 NIPQUAD(most->addr.ip), ntohs(most->port),
                  atomic_read(&most->activeconns),
                  atomic_read(&most->refcnt),
                  atomic_read(&most->weight), moh);
@@ -598,7 +598,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc, struct iphdr *iph)
 
        IP_VS_DBG(6, "LBLCR: server %d.%d.%d.%d:%d "
                  "activeconns %d refcnt %d weight %d overhead %d\n",
-                 NIPQUAD(least->addr), ntohs(least->port),
+                 NIPQUAD(least->addr.ip), ntohs(least->port),
                  atomic_read(&least->activeconns),
                  atomic_read(&least->refcnt),
                  atomic_read(&least->weight), loh);
@@ -706,7 +706,7 @@ out:
        IP_VS_DBG(6, "LBLCR: destination IP address %u.%u.%u.%u "
                  "--> server %u.%u.%u.%u:%d\n",
                  NIPQUAD(iph->daddr),
-                 NIPQUAD(dest->addr),
+                 NIPQUAD(dest->addr.ip),
                  ntohs(dest->port));
 
        return dest;
@@ -722,6 +722,9 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
        .refcnt =               ATOMIC_INIT(0),
        .module =               THIS_MODULE,
        .n_list =               LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+       .supports_ipv6 =        0,
+#endif
        .init_service =         ip_vs_lblcr_init_svc,
        .done_service =         ip_vs_lblcr_done_svc,
        .schedule =             ip_vs_lblcr_schedule,
index 2c3de1b..b69f808 100644 (file)
@@ -67,10 +67,10 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
        }
 
        if (least)
-       IP_VS_DBG(6, "LC: server %u.%u.%u.%u:%u activeconns %d inactconns %d\n",
-                 NIPQUAD(least->addr), ntohs(least->port),
-                 atomic_read(&least->activeconns),
-                 atomic_read(&least->inactconns));
+       IP_VS_DBG_BUF(6, "LC: server %s:%u activeconns %d inactconns %d\n",
+                     IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+                     atomic_read(&least->activeconns),
+                     atomic_read(&least->inactconns));
 
        return least;
 }
@@ -81,6 +81,9 @@ static struct ip_vs_scheduler ip_vs_lc_scheduler = {
        .refcnt =               ATOMIC_INIT(0),
        .module =               THIS_MODULE,
        .n_list =               LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+       .supports_ipv6 =        1,
+#endif
        .schedule =             ip_vs_lc_schedule,
 };
 
index 5330d5a..9a2d803 100644 (file)
@@ -99,12 +99,12 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
                return NULL;
 
   out:
-       IP_VS_DBG(6, "NQ: server %u.%u.%u.%u:%u "
-                 "activeconns %d refcnt %d weight %d overhead %d\n",
-                 NIPQUAD(least->addr), ntohs(least->port),
-                 atomic_read(&least->activeconns),
-                 atomic_read(&least->refcnt),
-                 atomic_read(&least->weight), loh);
+       IP_VS_DBG_BUF(6, "NQ: server %s:%u "
+                     "activeconns %d refcnt %d weight %d overhead %d\n",
+                     IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+                     atomic_read(&least->activeconns),
+                     atomic_read(&least->refcnt),
+                     atomic_read(&least->weight), loh);
 
        return least;
 }
@@ -116,6 +116,9 @@ static struct ip_vs_scheduler ip_vs_nq_scheduler =
        .refcnt =               ATOMIC_INIT(0),
        .module =               THIS_MODULE,
        .n_list =               LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+       .supports_ipv6 =        1,
+#endif
        .schedule =             ip_vs_nq_schedule,
 };
 
index 6099a88..b06da1c 100644 (file)
@@ -151,11 +151,11 @@ const char * ip_vs_state_name(__u16 proto, int state)
 }
 
 
-void
-ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp,
-                         const struct sk_buff *skb,
-                         int offset,
-                         const char *msg)
+static void
+ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp,
+                            const struct sk_buff *skb,
+                            int offset,
+                            const char *msg)
 {
        char buf[128];
        struct iphdr _iph, *ih;
@@ -189,6 +189,61 @@ ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp,
        printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
 }
 
+#ifdef CONFIG_IP_VS_IPV6
+static void
+ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp,
+                            const struct sk_buff *skb,
+                            int offset,
+                            const char *msg)
+{
+       char buf[192];
+       struct ipv6hdr _iph, *ih;
+
+       ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+       if (ih == NULL)
+               sprintf(buf, "%s TRUNCATED", pp->name);
+       else if (ih->nexthdr == IPPROTO_FRAGMENT)
+               sprintf(buf, "%s " NIP6_FMT "->" NIP6_FMT " frag",
+                       pp->name, NIP6(ih->saddr),
+                       NIP6(ih->daddr));
+       else {
+               __be16 _ports[2], *pptr;
+
+               pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr),
+                                         sizeof(_ports), _ports);
+               if (pptr == NULL)
+                       sprintf(buf, "%s TRUNCATED " NIP6_FMT "->" NIP6_FMT,
+                               pp->name,
+                               NIP6(ih->saddr),
+                               NIP6(ih->daddr));
+               else
+                       sprintf(buf, "%s " NIP6_FMT ":%u->" NIP6_FMT ":%u",
+                               pp->name,
+                               NIP6(ih->saddr),
+                               ntohs(pptr[0]),
+                               NIP6(ih->daddr),
+                               ntohs(pptr[1]));
+       }
+
+       printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
+}
+#endif
+
+
+void
+ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp,
+                         const struct sk_buff *skb,
+                         int offset,
+                         const char *msg)
+{
+#ifdef CONFIG_IP_VS_IPV6
+       if (skb->protocol == __constant_htons(ETH_P_IPV6))
+               ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg);
+       else
+#endif
+               ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg);
+}
+
 
 int __init ip_vs_protocol_init(void)
 {
index 3f9ebd7..2b18a78 100644 (file)
@@ -39,25 +39,23 @@ struct isakmp_hdr {
 
 
 static struct ip_vs_conn *
-ah_esp_conn_in_get(const struct sk_buff *skb,
-                  struct ip_vs_protocol *pp,
-                  const struct iphdr *iph,
-                  unsigned int proto_off,
+ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+                  const struct ip_vs_iphdr *iph, unsigned int proto_off,
                   int inverse)
 {
        struct ip_vs_conn *cp;
 
        if (likely(!inverse)) {
-               cp = ip_vs_conn_in_get(IPPROTO_UDP,
-                                      iph->saddr,
+               cp = ip_vs_conn_in_get(af, IPPROTO_UDP,
+                                      &iph->saddr,
                                       htons(PORT_ISAKMP),
-                                      iph->daddr,
+                                      &iph->daddr,
                                       htons(PORT_ISAKMP));
        } else {
-               cp = ip_vs_conn_in_get(IPPROTO_UDP,
-                                      iph->daddr,
+               cp = ip_vs_conn_in_get(af, IPPROTO_UDP,
+                                      &iph->daddr,
                                       htons(PORT_ISAKMP),
-                                      iph->saddr,
+                                      &iph->saddr,
                                       htons(PORT_ISAKMP));
        }
 
@@ -66,12 +64,12 @@ ah_esp_conn_in_get(const struct sk_buff *skb,
                 * We are not sure if the packet is from our
                 * service, so our conn_schedule hook should return NF_ACCEPT
                 */
-               IP_VS_DBG(12, "Unknown ISAKMP entry for outin packet "
-                         "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n",
-                         inverse ? "ICMP+" : "",
-                         pp->name,
-                         NIPQUAD(iph->saddr),
-                         NIPQUAD(iph->daddr));
+               IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "
+                             "%s%s %s->%s\n",
+                             inverse ? "ICMP+" : "",
+                             pp->name,
+                             IP_VS_DBG_ADDR(af, &iph->saddr),
+                             IP_VS_DBG_ADDR(af, &iph->daddr));
        }
 
        return cp;
@@ -79,32 +77,35 @@ ah_esp_conn_in_get(const struct sk_buff *skb,
 
 
 static struct ip_vs_conn *
-ah_esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
-                   const struct iphdr *iph, unsigned int proto_off, int inverse)
+ah_esp_conn_out_get(int af, const struct sk_buff *skb,
+                   struct ip_vs_protocol *pp,
+                   const struct ip_vs_iphdr *iph,
+                   unsigned int proto_off,
+                   int inverse)
 {
        struct ip_vs_conn *cp;
 
        if (likely(!inverse)) {
-               cp = ip_vs_conn_out_get(IPPROTO_UDP,
-                                       iph->saddr,
+               cp = ip_vs_conn_out_get(af, IPPROTO_UDP,
+                                       &iph->saddr,
                                        htons(PORT_ISAKMP),
-                                       iph->daddr,
+                                       &iph->daddr,
                                        htons(PORT_ISAKMP));
        } else {
-               cp = ip_vs_conn_out_get(IPPROTO_UDP,
-                                       iph->daddr,
+               cp = ip_vs_conn_out_get(af, IPPROTO_UDP,
+                                       &iph->daddr,
                                        htons(PORT_ISAKMP),
-                                       iph->saddr,
+                                       &iph->saddr,
                                        htons(PORT_ISAKMP));
        }
 
        if (!cp) {
-               IP_VS_DBG(12, "Unknown ISAKMP entry for inout packet "
-                         "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n",
-                         inverse ? "ICMP+" : "",
-                         pp->name,
-                         NIPQUAD(iph->saddr),
-                         NIPQUAD(iph->daddr));
+               IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
+                             "%s%s %s->%s\n",
+                             inverse ? "ICMP+" : "",
+                             pp->name,
+                             IP_VS_DBG_ADDR(af, &iph->saddr),
+                             IP_VS_DBG_ADDR(af, &iph->daddr));
        }
 
        return cp;
@@ -112,8 +113,7 @@ ah_esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
 
 
 static int
-ah_esp_conn_schedule(struct sk_buff *skb,
-                    struct ip_vs_protocol *pp,
+ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
                     int *verdict, struct ip_vs_conn **cpp)
 {
        /*
@@ -125,8 +125,8 @@ ah_esp_conn_schedule(struct sk_buff *skb,
 
 
 static void
-ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
-                   int offset, const char *msg)
+ah_esp_debug_packet_v4(struct ip_vs_protocol *pp, const struct sk_buff *skb,
+                      int offset, const char *msg)
 {
        char buf[256];
        struct iphdr _iph, *ih;
@@ -142,6 +142,38 @@ ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
        printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
 }
 
+#ifdef CONFIG_IP_VS_IPV6
+static void
+ah_esp_debug_packet_v6(struct ip_vs_protocol *pp, const struct sk_buff *skb,
+                      int offset, const char *msg)
+{
+       char buf[256];
+       struct ipv6hdr _iph, *ih;
+
+       ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+       if (ih == NULL)
+               sprintf(buf, "%s TRUNCATED", pp->name);
+       else
+               sprintf(buf, "%s " NIP6_FMT "->" NIP6_FMT,
+                       pp->name, NIP6(ih->saddr),
+                       NIP6(ih->daddr));
+
+       printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
+}
+#endif
+
+static void
+ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
+                   int offset, const char *msg)
+{
+#ifdef CONFIG_IP_VS_IPV6
+       if (skb->protocol == __constant_htons(ETH_P_IPV6))
+               ah_esp_debug_packet_v6(pp, skb, offset, msg);
+       else
+#endif
+               ah_esp_debug_packet_v4(pp, skb, offset, msg);
+}
+
 
 static void ah_esp_init(struct ip_vs_protocol *pp)
 {
index d0ea467..537f616 100644 (file)
@@ -25,8 +25,9 @@
 
 
 static struct ip_vs_conn *
-tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
-               const struct iphdr *iph, unsigned int proto_off, int inverse)
+tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+               const struct ip_vs_iphdr *iph, unsigned int proto_off,
+               int inverse)
 {
        __be16 _ports[2], *pptr;
 
@@ -35,19 +36,20 @@ tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
                return NULL;
 
        if (likely(!inverse)) {
-               return ip_vs_conn_in_get(iph->protocol,
-                                        iph->saddr, pptr[0],
-                                        iph->daddr, pptr[1]);
+               return ip_vs_conn_in_get(af, iph->protocol,
+                                        &iph->saddr, pptr[0],
+                                        &iph->daddr, pptr[1]);
        } else {
-               return ip_vs_conn_in_get(iph->protocol,
-                                        iph->daddr, pptr[1],
-                                        iph->saddr, pptr[0]);
+               return ip_vs_conn_in_get(af, iph->protocol,
+                                        &iph->daddr, pptr[1],
+                                        &iph->saddr, pptr[0]);
        }
 }
 
 static struct ip_vs_conn *
-tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
-                const struct iphdr *iph, unsigned int proto_off, int inverse)
+tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+                const struct ip_vs_iphdr *iph, unsigned int proto_off,
+                int inverse)
 {
        __be16 _ports[2], *pptr;
 
@@ -56,34 +58,36 @@ tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
                return NULL;
 
        if (likely(!inverse)) {
-               return ip_vs_conn_out_get(iph->protocol,
-                                         iph->saddr, pptr[0],
-                                         iph->daddr, pptr[1]);
+               return ip_vs_conn_out_get(af, iph->protocol,
+                                         &iph->saddr, pptr[0],
+                                         &iph->daddr, pptr[1]);
        } else {
-               return ip_vs_conn_out_get(iph->protocol,
-                                         iph->daddr, pptr[1],
-                                         iph->saddr, pptr[0]);
+               return ip_vs_conn_out_get(af, iph->protocol,
+                                         &iph->daddr, pptr[1],
+                                         &iph->saddr, pptr[0]);
        }
 }
 
 
 static int
-tcp_conn_schedule(struct sk_buff *skb,
-                 struct ip_vs_protocol *pp,
+tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
                  int *verdict, struct ip_vs_conn **cpp)
 {
        struct ip_vs_service *svc;
        struct tcphdr _tcph, *th;
+       struct ip_vs_iphdr iph;
 
-       th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
+       ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+
+       th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph);
        if (th == NULL) {
                *verdict = NF_DROP;
                return 0;
        }
 
        if (th->syn &&
-           (svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
-                                    ip_hdr(skb)->daddr, th->dest))) {
+           (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr,
+                                    th->dest))) {
                if (ip_vs_todrop()) {
                        /*
                         * It seems that we are very loaded.
@@ -110,22 +114,62 @@ tcp_conn_schedule(struct sk_buff *skb,
 
 
 static inline void
-tcp_fast_csum_update(struct tcphdr *tcph, __be32 oldip, __be32 newip,
+tcp_fast_csum_update(int af, struct tcphdr *tcph,
+                    const union nf_inet_addr *oldip,
+                    const union nf_inet_addr *newip,
                     __be16 oldport, __be16 newport)
 {
+#ifdef CONFIG_IP_VS_IPV6
+       if (af == AF_INET6)
+               tcph->check =
+                       csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
+                                        ip_vs_check_diff2(oldport, newport,
+                                               ~csum_unfold(tcph->check))));
+       else
+#endif
        tcph->check =
-               csum_fold(ip_vs_check_diff4(oldip, newip,
+               csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
                                 ip_vs_check_diff2(oldport, newport,
                                                ~csum_unfold(tcph->check))));
 }
 
 
+static inline void
+tcp_partial_csum_update(int af, struct tcphdr *tcph,
+                    const union nf_inet_addr *oldip,
+                    const union nf_inet_addr *newip,
+                    __be16 oldlen, __be16 newlen)
+{
+#ifdef CONFIG_IP_VS_IPV6
+       if (af == AF_INET6)
+               tcph->check =
+                       csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
+                                        ip_vs_check_diff2(oldlen, newlen,
+                                               ~csum_unfold(tcph->check))));
+       else
+#endif
+       tcph->check =
+               csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
+                               ip_vs_check_diff2(oldlen, newlen,
+                                               ~csum_unfold(tcph->check))));
+}
+
+
 static int
 tcp_snat_handler(struct sk_buff *skb,
                 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 {
        struct tcphdr *tcph;
-       const unsigned int tcphoff = ip_hdrlen(skb);
+       unsigned int tcphoff;
+       int oldlen;
+
+#ifdef CONFIG_IP_VS_IPV6
+       if (cp->af == AF_INET6)
+               tcphoff = sizeof(struct ipv6hdr);
+       else
+#endif
+               tcphoff = ip_hdrlen(skb);
+       oldlen = skb->len - tcphoff;
 
        /* csum_check requires unshared skb */
        if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
@@ -133,7 +177,7 @@ tcp_snat_handler(struct sk_buff *skb,
 
        if (unlikely(cp->app != NULL)) {
                /* Some checks before mangling */
-               if (pp->csum_check && !pp->csum_check(skb, pp))
+               if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
                        return 0;
 
                /* Call application helper if needed */
@@ -141,13 +185,17 @@ tcp_snat_handler(struct sk_buff *skb,
                        return 0;
        }
 
-       tcph = (void *)ip_hdr(skb) + tcphoff;
+       tcph = (void *)skb_network_header(skb) + tcphoff;
        tcph->source = cp->vport;
 
        /* Adjust TCP checksums */
-       if (!cp->app) {
+       if (skb->ip_summed == CHECKSUM_PARTIAL) {
+               tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
+                                       htonl(oldlen),
+                                       htonl(skb->len - tcphoff));
+       } else if (!cp->app) {
                /* Only port and addr are changed, do fast csum update */
-               tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr,
+               tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
                                     cp->dport, cp->vport);
                if (skb->ip_summed == CHECKSUM_COMPLETE)
                        skb->ip_summed = CHECKSUM_NONE;
@@ -155,9 +203,20 @@ tcp_snat_handler(struct sk_buff *skb,
                /* full checksum calculation */
                tcph->check = 0;
                skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
-               tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
-                                               skb->len - tcphoff,
-                                               cp->protocol, skb->csum);
+#ifdef CONFIG_IP_VS_IPV6
+               if (cp->af == AF_INET6)
+                       tcph->check = csum_ipv6_magic(&cp->vaddr.in6,
+                                                     &cp->caddr.in6,
+                                                     skb->len - tcphoff,
+                                                     cp->protocol, skb->csum);
+               else
+#endif
+                       tcph->check = csum_tcpudp_magic(cp->vaddr.ip,
+                                                       cp->caddr.ip,
+                                                       skb->len - tcphoff,
+                                                       cp->protocol,
+                                                       skb->csum);
+
                IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
                          pp->name, tcph->check,
                          (char*)&(tcph->check) - (char*)tcph);
@@ -171,7 +230,16 @@ tcp_dnat_handler(struct sk_buff *skb,
                 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 {
        struct tcphdr *tcph;
-       const unsigned int tcphoff = ip_hdrlen(skb);
+       unsigned int tcphoff;
+       int oldlen;
+
+#ifdef CONFIG_IP_VS_IPV6
+       if (cp->af == AF_INET6)
+               tcphoff = sizeof(struct ipv6hdr);
+       else
+#endif
+               tcphoff = ip_hdrlen(skb);
+       oldlen = skb->len - tcphoff;
 
        /* csum_check requires unshared skb */
        if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
@@ -179,7 +247,7 @@ tcp_dnat_handler(struct sk_buff *skb,
 
        if (unlikely(cp->app != NULL)) {
                /* Some checks before mangling */
-               if (pp->csum_check && !pp->csum_check(skb, pp))
+               if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
                        return 0;
 
                /*
@@ -190,15 +258,19 @@ tcp_dnat_handler(struct sk_buff *skb,
                        return 0;
        }
 
-       tcph = (void *)ip_hdr(skb) + tcphoff;
+       tcph = (void *)skb_network_header(skb) + tcphoff;
        tcph->dest = cp->dport;
 
        /*
         *      Adjust TCP checksums
         */
-       if (!cp->app) {
+       if (skb->ip_summed == CHECKSUM_PARTIAL) {
+               tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
+                                       htonl(oldlen),
+                                       htonl(skb->len - tcphoff));
+       } else if (!cp->app) {
                /* Only port and addr are changed, do fast csum update */
-               tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr,
+               tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
                                     cp->vport, cp->dport);
                if (skb->ip_summed == CHECKSUM_COMPLETE)
                        skb->ip_summed = CHECKSUM_NONE;
@@ -206,9 +278,19 @@ tcp_dnat_handler(struct sk_buff *skb,
                /* full checksum calculation */
                tcph->check = 0;
                skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
-               tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
-                                               skb->len - tcphoff,
-                                               cp->protocol, skb->csum);
+#ifdef CONFIG_IP_VS_IPV6
+               if (cp->af == AF_INET6)
+                       tcph->check = csum_ipv6_magic(&cp->caddr.in6,
+                                                     &cp->daddr.in6,
+                                                     skb->len - tcphoff,
+                                                     cp->protocol, skb->csum);
+               else
+#endif
+                       tcph->check = csum_tcpudp_magic(cp->caddr.ip,
+                                                       cp->daddr.ip,
+                                                       skb->len - tcphoff,
+                                                       cp->protocol,
+                                                       skb->csum);
                skb->ip_summed = CHECKSUM_UNNECESSARY;
        }
        return 1;
@@ -216,21 +298,43 @@ tcp_dnat_handler(struct sk_buff *skb,
 
 
 static int
-tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
+tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
 {
-       const unsigned int tcphoff = ip_hdrlen(skb);
+       unsigned int tcphoff;
+
+#ifdef CONFIG_IP_VS_IPV6
+       if (af == AF_INET6)
+               tcphoff = sizeof(struct ipv6hdr);
+       else
+#endif
+               tcphoff = ip_hdrlen(skb);
 
        switch (skb->ip_summed) {
        case CHECKSUM_NONE:
                skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
        case CHECKSUM_COMPLETE:
-               if (csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
-                                     skb->len - tcphoff,
-                                     ip_hdr(skb)->protocol, skb->csum)) {
-                       IP_VS_DBG_RL_PKT(0, pp, skb, 0,
-                                        "Failed checksum for");
-                       return 0;
-               }
+#ifdef CONFIG_IP_VS_IPV6
+               if (af == AF_INET6) {
+                       if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+                                           &ipv6_hdr(skb)->daddr,
+                                           skb->len - tcphoff,
+                                           ipv6_hdr(skb)->nexthdr,
+                                           skb->csum)) {
+                               IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+                                                "Failed checksum for");
+                               return 0;
+                       }
+               } else
+#endif
+                       if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
+                                             ip_hdr(skb)->daddr,
+                                             skb->len - tcphoff,
+                                             ip_hdr(skb)->protocol,
+                                             skb->csum)) {
+                               IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+                                                "Failed checksum for");
+                               return 0;
+                       }
                break;
        default:
                /* No need to checksum. */
@@ -419,19 +523,23 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
        if (new_state != cp->state) {
                struct ip_vs_dest *dest = cp->dest;
 
-               IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->"
-                         "%u.%u.%u.%u:%d state: %s->%s conn->refcnt:%d\n",
-                         pp->name,
-                         (state_off==TCP_DIR_OUTPUT)?"output ":"input ",
-                         th->syn? 'S' : '.',
-                         th->fin? 'F' : '.',
-                         th->ack? 'A' : '.',
-                         th->rst? 'R' : '.',
-                         NIPQUAD(cp->daddr), ntohs(cp->dport),
-                         NIPQUAD(cp->caddr), ntohs(cp->cport),
-                         tcp_state_name(cp->state),
-                         tcp_state_name(new_state),
-                         atomic_read(&cp->refcnt));
+               IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->"
+                             "%s:%d state: %s->%s conn->refcnt:%d\n",
+                             pp->name,
+                             ((state_off == TCP_DIR_OUTPUT) ?
+                              "output " : "input "),
+                             th->syn ? 'S' : '.',
+                             th->fin ? 'F' : '.',
+                             th->ack ? 'A' : '.',
+                             th->rst ? 'R' : '.',
+                             IP_VS_DBG_ADDR(cp->af, &cp->daddr),
+                             ntohs(cp->dport),
+                             IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+                             ntohs(cp->cport),
+                             tcp_state_name(cp->state),
+                             tcp_state_name(new_state),
+                             atomic_read(&cp->refcnt));
+
                if (dest) {
                        if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
                            (new_state != IP_VS_TCP_S_ESTABLISHED)) {
@@ -461,7 +569,13 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
 {
        struct tcphdr _tcph, *th;
 
-       th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
+#ifdef CONFIG_IP_VS_IPV6
+       int ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
+#else
+       int ihl = ip_hdrlen(skb);
+#endif
+
+       th = skb_header_pointer(skb, ihl, sizeof(_tcph), &_tcph);
        if (th == NULL)
                return 0;
 
@@ -546,12 +660,15 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
                                break;
                        spin_unlock(&tcp_app_lock);
 
-                       IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
-                                 "%u.%u.%u.%u:%u to app %s on port %u\n",
-                                 __func__,
-                                 NIPQUAD(cp->caddr), ntohs(cp->cport),
-                                 NIPQUAD(cp->vaddr), ntohs(cp->vport),
-                                 inc->name, ntohs(inc->port));
+                       IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
+                                     "%s:%u to app %s on port %u\n",
+                                     __func__,
+                                     IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+                                     ntohs(cp->cport),
+                                     IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+                                     ntohs(cp->vport),
+                                     inc->name, ntohs(inc->port));
+
                        cp->app = inc;
                        if (inc->init_conn)
                                result = inc->init_conn(inc, cp);
index c6be5d5..e3ee26b 100644 (file)
@@ -24,8 +24,9 @@
 #include <net/ip.h>
 
 static struct ip_vs_conn *
-udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
-               const struct iphdr *iph, unsigned int proto_off, int inverse)
+udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+               const struct ip_vs_iphdr *iph, unsigned int proto_off,
+               int inverse)
 {
        struct ip_vs_conn *cp;
        __be16 _ports[2], *pptr;
@@ -35,13 +36,13 @@ udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
                return NULL;
 
        if (likely(!inverse)) {
-               cp = ip_vs_conn_in_get(iph->protocol,
-                                      iph->saddr, pptr[0],
-                                      iph->daddr, pptr[1]);
+               cp = ip_vs_conn_in_get(af, iph->protocol,
+                                      &iph->saddr, pptr[0],
+                                      &iph->daddr, pptr[1]);
        } else {
-               cp = ip_vs_conn_in_get(iph->protocol,
-                                      iph->daddr, pptr[1],
-                                      iph->saddr, pptr[0]);
+               cp = ip_vs_conn_in_get(af, iph->protocol,
+                                      &iph->daddr, pptr[1],
+                                      &iph->saddr, pptr[0]);
        }
 
        return cp;
@@ -49,25 +50,25 @@ udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
 
 
 static struct ip_vs_conn *
-udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
-                const struct iphdr *iph, unsigned int proto_off, int inverse)
+udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+                const struct ip_vs_iphdr *iph, unsigned int proto_off,
+                int inverse)
 {
        struct ip_vs_conn *cp;
        __be16 _ports[2], *pptr;
 
-       pptr = skb_header_pointer(skb, ip_hdrlen(skb),
-                                 sizeof(_ports), _ports);
+       pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
        if (pptr == NULL)
                return NULL;
 
        if (likely(!inverse)) {
-               cp = ip_vs_conn_out_get(iph->protocol,
-                                       iph->saddr, pptr[0],
-                                       iph->daddr, pptr[1]);
+               cp = ip_vs_conn_out_get(af, iph->protocol,
+                                       &iph->saddr, pptr[0],
+                                       &iph->daddr, pptr[1]);
        } else {
-               cp = ip_vs_conn_out_get(iph->protocol,
-                                       iph->daddr, pptr[1],
-                                       iph->saddr, pptr[0]);
+               cp = ip_vs_conn_out_get(af, iph->protocol,
+                                       &iph->daddr, pptr[1],
+                                       &iph->saddr, pptr[0]);
        }
 
        return cp;
@@ -75,21 +76,24 @@ udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
 
 
 static int
-udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
+udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
                  int *verdict, struct ip_vs_conn **cpp)
 {
        struct ip_vs_service *svc;
        struct udphdr _udph, *uh;
+       struct ip_vs_iphdr iph;
+
+       ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
 
-       uh = skb_header_pointer(skb, ip_hdrlen(skb),
-                               sizeof(_udph), &_udph);
+       uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph);
        if (uh == NULL) {
                *verdict = NF_DROP;
                return 0;
        }
 
-       if ((svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
-                                    ip_hdr(skb)->daddr, uh->dest))) {
+       svc = ip_vs_service_get(af, skb->mark, iph.protocol,
+                               &iph.daddr, uh->dest);
+       if (svc) {
                if (ip_vs_todrop()) {
                        /*
                         * It seems that we are very loaded.
@@ -116,23 +120,63 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
 
 
 static inline void
-udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip,
+udp_fast_csum_update(int af, struct udphdr *uhdr,
+                    const union nf_inet_addr *oldip,
+                    const union nf_inet_addr *newip,
                     __be16 oldport, __be16 newport)
 {
-       uhdr->check =
-               csum_fold(ip_vs_check_diff4(oldip, newip,
-                                ip_vs_check_diff2(oldport, newport,
-                                       ~csum_unfold(uhdr->check))));
+#ifdef CONFIG_IP_VS_IPV6
+       if (af == AF_INET6)
+               uhdr->check =
+                       csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
+                                        ip_vs_check_diff2(oldport, newport,
+                                               ~csum_unfold(uhdr->check))));
+       else
+#endif
+               uhdr->check =
+                       csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
+                                        ip_vs_check_diff2(oldport, newport,
+                                               ~csum_unfold(uhdr->check))));
        if (!uhdr->check)
                uhdr->check = CSUM_MANGLED_0;
 }
 
+static inline void
+udp_partial_csum_update(int af, struct udphdr *uhdr,
+                    const union nf_inet_addr *oldip,
+                    const union nf_inet_addr *newip,
+                    __be16 oldlen, __be16 newlen)
+{
+#ifdef CONFIG_IP_VS_IPV6
+       if (af == AF_INET6)
+               uhdr->check =
+                       csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
+                                        ip_vs_check_diff2(oldlen, newlen,
+                                               ~csum_unfold(uhdr->check))));
+       else
+#endif
+       uhdr->check =
+               csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
+                               ip_vs_check_diff2(oldlen, newlen,
+                                               ~csum_unfold(uhdr->check))));
+}
+
+
 static int
 udp_snat_handler(struct sk_buff *skb,
                 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 {
        struct udphdr *udph;
-       const unsigned int udphoff = ip_hdrlen(skb);
+       unsigned int udphoff;
+       int oldlen;
+
+#ifdef CONFIG_IP_VS_IPV6
+       if (cp->af == AF_INET6)
+               udphoff = sizeof(struct ipv6hdr);
+       else
+#endif
+               udphoff = ip_hdrlen(skb);
+       oldlen = skb->len - udphoff;
 
        /* csum_check requires unshared skb */
        if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
@@ -140,7 +184,7 @@ udp_snat_handler(struct sk_buff *skb,
 
        if (unlikely(cp->app != NULL)) {
                /* Some checks before mangling */
-               if (pp->csum_check && !pp->csum_check(skb, pp))
+               if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
                        return 0;
 
                /*
@@ -150,15 +194,19 @@ udp_snat_handler(struct sk_buff *skb,
                        return 0;
        }
 
-       udph = (void *)ip_hdr(skb) + udphoff;
+       udph = (void *)skb_network_header(skb) + udphoff;
        udph->source = cp->vport;
 
        /*
         *      Adjust UDP checksums
         */
-       if (!cp->app && (udph->check != 0)) {
+       if (skb->ip_summed == CHECKSUM_PARTIAL) {
+               udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
+                                       htonl(oldlen),
+                                       htonl(skb->len - udphoff));
+       } else if (!cp->app && (udph->check != 0)) {
                /* Only port and addr are changed, do fast csum update */
-               udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
+               udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
                                     cp->dport, cp->vport);
                if (skb->ip_summed == CHECKSUM_COMPLETE)
                        skb->ip_summed = CHECKSUM_NONE;
@@ -166,9 +214,19 @@ udp_snat_handler(struct sk_buff *skb,
                /* full checksum calculation */
                udph->check = 0;
                skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
-               udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
-                                               skb->len - udphoff,
-                                               cp->protocol, skb->csum);
+#ifdef CONFIG_IP_VS_IPV6
+               if (cp->af == AF_INET6)
+                       udph->check = csum_ipv6_magic(&cp->vaddr.in6,
+                                                     &cp->caddr.in6,
+                                                     skb->len - udphoff,
+                                                     cp->protocol, skb->csum);
+               else
+#endif
+                       udph->check = csum_tcpudp_magic(cp->vaddr.ip,
+                                                       cp->caddr.ip,
+                                                       skb->len - udphoff,
+                                                       cp->protocol,
+                                                       skb->csum);
                if (udph->check == 0)
                        udph->check = CSUM_MANGLED_0;
                IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
@@ -184,7 +242,16 @@ udp_dnat_handler(struct sk_buff *skb,
                 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 {
        struct udphdr *udph;
-       unsigned int udphoff = ip_hdrlen(skb);
+       unsigned int udphoff;
+       int oldlen;
+
+#ifdef CONFIG_IP_VS_IPV6
+       if (cp->af == AF_INET6)
+               udphoff = sizeof(struct ipv6hdr);
+       else
+#endif
+               udphoff = ip_hdrlen(skb);
+       oldlen = skb->len - udphoff;
 
        /* csum_check requires unshared skb */
        if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
@@ -192,7 +259,7 @@ udp_dnat_handler(struct sk_buff *skb,
 
        if (unlikely(cp->app != NULL)) {
                /* Some checks before mangling */
-               if (pp->csum_check && !pp->csum_check(skb, pp))
+               if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
                        return 0;
 
                /*
@@ -203,15 +270,19 @@ udp_dnat_handler(struct sk_buff *skb,
                        return 0;
        }
 
-       udph = (void *)ip_hdr(skb) + udphoff;
+       udph = (void *)skb_network_header(skb) + udphoff;
        udph->dest = cp->dport;
 
        /*
         *      Adjust UDP checksums
         */
-       if (!cp->app && (udph->check != 0)) {
+       if (skb->ip_summed == CHECKSUM_PARTIAL) {
+               udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
+                                       htonl(oldlen),
+                                       htonl(skb->len - udphoff));
+       } else if (!cp->app && (udph->check != 0)) {
                /* Only port and addr are changed, do fast csum update */
-               udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
+               udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
                                     cp->vport, cp->dport);
                if (skb->ip_summed == CHECKSUM_COMPLETE)
                        skb->ip_summed = CHECKSUM_NONE;
@@ -219,9 +290,19 @@ udp_dnat_handler(struct sk_buff *skb,
                /* full checksum calculation */
                udph->check = 0;
                skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
-               udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
-                                               skb->len - udphoff,
-                                               cp->protocol, skb->csum);
+#ifdef CONFIG_IP_VS_IPV6
+               if (cp->af == AF_INET6)
+                       udph->check = csum_ipv6_magic(&cp->caddr.in6,
+                                                     &cp->daddr.in6,
+                                                     skb->len - udphoff,
+                                                     cp->protocol, skb->csum);
+               else
+#endif
+                       udph->check = csum_tcpudp_magic(cp->caddr.ip,
+                                                       cp->daddr.ip,
+                                                       skb->len - udphoff,
+                                                       cp->protocol,
+                                                       skb->csum);
                if (udph->check == 0)
                        udph->check = CSUM_MANGLED_0;
                skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -231,10 +312,17 @@ udp_dnat_handler(struct sk_buff *skb,
 
 
 static int
-udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
+udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
 {
        struct udphdr _udph, *uh;
-       const unsigned int udphoff = ip_hdrlen(skb);
+       unsigned int udphoff;
+
+#ifdef CONFIG_IP_VS_IPV6
+       if (af == AF_INET6)
+               udphoff = sizeof(struct ipv6hdr);
+       else
+#endif
+               udphoff = ip_hdrlen(skb);
 
        uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
        if (uh == NULL)
@@ -246,15 +334,28 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
                        skb->csum = skb_checksum(skb, udphoff,
                                                 skb->len - udphoff, 0);
                case CHECKSUM_COMPLETE:
-                       if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
-                                             ip_hdr(skb)->daddr,
-                                             skb->len - udphoff,
-                                             ip_hdr(skb)->protocol,
-                                             skb->csum)) {
-                               IP_VS_DBG_RL_PKT(0, pp, skb, 0,
-                                                "Failed checksum for");
-                               return 0;
-                       }
+#ifdef CONFIG_IP_VS_IPV6
+                       if (af == AF_INET6) {
+                               if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+                                                   &ipv6_hdr(skb)->daddr,
+                                                   skb->len - udphoff,
+                                                   ipv6_hdr(skb)->nexthdr,
+                                                   skb->csum)) {
+                                       IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+                                                        "Failed checksum for");
+                                       return 0;
+                               }
+                       } else
+#endif
+                               if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
+                                                     ip_hdr(skb)->daddr,
+                                                     skb->len - udphoff,
+                                                     ip_hdr(skb)->protocol,
+                                                     skb->csum)) {
+                                       IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+                                                        "Failed checksum for");
+                                       return 0;
+                               }
                        break;
                default:
                        /* No need to checksum. */
@@ -340,12 +441,15 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
                                break;
                        spin_unlock(&udp_app_lock);
 
-                       IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
-                                 "%u.%u.%u.%u:%u to app %s on port %u\n",
-                                 __func__,
-                                 NIPQUAD(cp->caddr), ntohs(cp->cport),
-                                 NIPQUAD(cp->vaddr), ntohs(cp->vport),
-                                 inc->name, ntohs(inc->port));
+                       IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
+                                     "%s:%u to app %s on port %u\n",
+                                     __func__,
+                                     IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+                                     ntohs(cp->cport),
+                                     IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+                                     ntohs(cp->vport),
+                                     inc->name, ntohs(inc->port));
+
                        cp->app = inc;
                        if (inc->init_conn)
                                result = inc->init_conn(inc, cp);
index f749291..a22195f 100644 (file)
@@ -74,11 +74,11 @@ ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
   out:
        svc->sched_data = q;
        write_unlock(&svc->sched_lock);
-       IP_VS_DBG(6, "RR: server %u.%u.%u.%u:%u "
-                 "activeconns %d refcnt %d weight %d\n",
-                 NIPQUAD(dest->addr), ntohs(dest->port),
-                 atomic_read(&dest->activeconns),
-                 atomic_read(&dest->refcnt), atomic_read(&dest->weight));
+       IP_VS_DBG_BUF(6, "RR: server %s:%u "
+                     "activeconns %d refcnt %d weight %d\n",
+                     IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
+                     atomic_read(&dest->activeconns),
+                     atomic_read(&dest->refcnt), atomic_read(&dest->weight));
 
        return dest;
 }
@@ -89,6 +89,9 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = {
        .refcnt =               ATOMIC_INIT(0),
        .module =               THIS_MODULE,
        .n_list =               LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+       .supports_ipv6 =        1,
+#endif
        .init_service =         ip_vs_rr_init_svc,
        .update_service =       ip_vs_rr_update_svc,
        .schedule =             ip_vs_rr_schedule,
index 53f73be..7d2f22f 100644 (file)
@@ -101,12 +101,12 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
                }
        }
 
-       IP_VS_DBG(6, "SED: server %u.%u.%u.%u:%u "
-                 "activeconns %d refcnt %d weight %d overhead %d\n",
-                 NIPQUAD(least->addr), ntohs(least->port),
-                 atomic_read(&least->activeconns),
-                 atomic_read(&least->refcnt),
-                 atomic_read(&least->weight), loh);
+       IP_VS_DBG_BUF(6, "SED: server %s:%u "
+                     "activeconns %d refcnt %d weight %d overhead %d\n",
+                     IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+                     atomic_read(&least->activeconns),
+                     atomic_read(&least->refcnt),
+                     atomic_read(&least->weight), loh);
 
        return least;
 }
@@ -118,6 +118,9 @@ static struct ip_vs_scheduler ip_vs_sed_scheduler =
        .refcnt =               ATOMIC_INIT(0),
        .module =               THIS_MODULE,
        .n_list =               LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+       .supports_ipv6 =        1,
+#endif
        .schedule =             ip_vs_sed_schedule,
 };
 
index 7b979e2..1d96de2 100644 (file)
@@ -215,7 +215,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
        IP_VS_DBG(6, "SH: source IP address %u.%u.%u.%u "
                  "--> server %u.%u.%u.%u:%d\n",
                  NIPQUAD(iph->saddr),
-                 NIPQUAD(dest->addr),
+                 NIPQUAD(dest->addr.ip),
                  ntohs(dest->port));
 
        return dest;
@@ -231,6 +231,9 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler =
        .refcnt =               ATOMIC_INIT(0),
        .module =               THIS_MODULE,
        .n_list  =              LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+       .supports_ipv6 =        0,
+#endif
        .init_service =         ip_vs_sh_init_svc,
        .done_service =         ip_vs_sh_done_svc,
        .update_service =       ip_vs_sh_update_svc,
index a652da2..28237a5 100644 (file)
@@ -256,9 +256,9 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
        s->cport = cp->cport;
        s->vport = cp->vport;
        s->dport = cp->dport;
-       s->caddr = cp->caddr;
-       s->vaddr = cp->vaddr;
-       s->daddr = cp->daddr;
+       s->caddr = cp->caddr.ip;
+       s->vaddr = cp->vaddr.ip;
+       s->daddr = cp->daddr.ip;
        s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED);
        s->state = htons(cp->state);
        if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
@@ -366,21 +366,28 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
                }
 
                if (!(flags & IP_VS_CONN_F_TEMPLATE))
-                       cp = ip_vs_conn_in_get(s->protocol,
-                                              s->caddr, s->cport,
-                                              s->vaddr, s->vport);
+                       cp = ip_vs_conn_in_get(AF_INET, s->protocol,
+                                              (union nf_inet_addr *)&s->caddr,
+                                              s->cport,
+                                              (union nf_inet_addr *)&s->vaddr,
+                                              s->vport);
                else
-                       cp = ip_vs_ct_in_get(s->protocol,
-                                              s->caddr, s->cport,
-                                              s->vaddr, s->vport);
+                       cp = ip_vs_ct_in_get(AF_INET, s->protocol,
+                                            (union nf_inet_addr *)&s->caddr,
+                                            s->cport,
+                                            (union nf_inet_addr *)&s->vaddr,
+                                            s->vport);
                if (!cp) {
                        /*
                         * Find the appropriate destination for the connection.
                         * If it is not found the connection will remain unbound
                         * but still handled.
                         */
-                       dest = ip_vs_find_dest(s->daddr, s->dport,
-                                              s->vaddr, s->vport,
+                       dest = ip_vs_find_dest(AF_INET,
+                                              (union nf_inet_addr *)&s->daddr,
+                                              s->dport,
+                                              (union nf_inet_addr *)&s->vaddr,
+                                              s->vport,
                                               s->protocol);
                        /*  Set the approprite ativity flag */
                        if (s->protocol == IPPROTO_TCP) {
@@ -389,10 +396,13 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
                                else
                                        flags &= ~IP_VS_CONN_F_INACTIVE;
                        }
-                       cp = ip_vs_conn_new(s->protocol,
-                                           s->caddr, s->cport,
-                                           s->vaddr, s->vport,
-                                           s->daddr, s->dport,
+                       cp = ip_vs_conn_new(AF_INET, s->protocol,
+                                           (union nf_inet_addr *)&s->caddr,
+                                           s->cport,
+                                           (union nf_inet_addr *)&s->vaddr,
+                                           s->vport,
+                                           (union nf_inet_addr *)&s->daddr,
+                                           s->dport,
                                            flags, dest);
                        if (dest)
                                atomic_dec(&dest->refcnt);
index df7ad8d..8c596e7 100644 (file)
@@ -89,12 +89,12 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
                }
        }
 
-       IP_VS_DBG(6, "WLC: server %u.%u.%u.%u:%u "
-                 "activeconns %d refcnt %d weight %d overhead %d\n",
-                 NIPQUAD(least->addr), ntohs(least->port),
-                 atomic_read(&least->activeconns),
-                 atomic_read(&least->refcnt),
-                 atomic_read(&least->weight), loh);
+       IP_VS_DBG_BUF(6, "WLC: server %s:%u "
+                     "activeconns %d refcnt %d weight %d overhead %d\n",
+                     IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+                     atomic_read(&least->activeconns),
+                     atomic_read(&least->refcnt),
+                     atomic_read(&least->weight), loh);
 
        return least;
 }
@@ -106,6 +106,9 @@ static struct ip_vs_scheduler ip_vs_wlc_scheduler =
        .refcnt =               ATOMIC_INIT(0),
        .module =               THIS_MODULE,
        .n_list =               LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+       .supports_ipv6 =        1,
+#endif
        .schedule =             ip_vs_wlc_schedule,
 };
 
index 0d86a79..7ea92fe 100644 (file)
@@ -195,12 +195,12 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
                }
        }
 
-       IP_VS_DBG(6, "WRR: server %u.%u.%u.%u:%u "
-                 "activeconns %d refcnt %d weight %d\n",
-                 NIPQUAD(dest->addr), ntohs(dest->port),
-                 atomic_read(&dest->activeconns),
-                 atomic_read(&dest->refcnt),
-                 atomic_read(&dest->weight));
+       IP_VS_DBG_BUF(6, "WRR: server %s:%u "
+                     "activeconns %d refcnt %d weight %d\n",
+                     IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
+                     atomic_read(&dest->activeconns),
+                     atomic_read(&dest->refcnt),
+                     atomic_read(&dest->weight));
 
   out:
        write_unlock(&svc->sched_lock);
@@ -213,6 +213,9 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = {
        .refcnt =               ATOMIC_INIT(0),
        .module =               THIS_MODULE,
        .n_list =               LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+       .supports_ipv6 =        1,
+#endif
        .init_service =         ip_vs_wrr_init_svc,
        .done_service =         ip_vs_wrr_done_svc,
        .update_service =       ip_vs_wrr_update_svc,
index 9892d4a..02ddc2b 100644 (file)
@@ -20,6 +20,9 @@
 #include <net/udp.h>
 #include <net/icmp.h>                   /* for icmp_send */
 #include <net/route.h>                  /* for ip_route_output */
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <linux/icmpv6.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 
@@ -47,7 +50,8 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
 
        if (!dst)
                return NULL;
-       if ((dst->obsolete || rtos != dest->dst_rtos) &&
+       if ((dst->obsolete
+            || (dest->af == AF_INET && rtos != dest->dst_rtos)) &&
            dst->ops->check(dst, cookie) == NULL) {
                dest->dst_cache = NULL;
                dst_release(dst);
@@ -71,7 +75,7 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
                                .oif = 0,
                                .nl_u = {
                                        .ip4_u = {
-                                               .daddr = dest->addr,
+                                               .daddr = dest->addr.ip,
                                                .saddr = 0,
                                                .tos = rtos, } },
                        };
@@ -80,12 +84,12 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
                                spin_unlock(&dest->dst_lock);
                                IP_VS_DBG_RL("ip_route_output error, "
                                             "dest: %u.%u.%u.%u\n",
-                                            NIPQUAD(dest->addr));
+                                            NIPQUAD(dest->addr.ip));
                                return NULL;
                        }
                        __ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst));
                        IP_VS_DBG(10, "new dst %u.%u.%u.%u, refcnt=%d, rtos=%X\n",
-                                 NIPQUAD(dest->addr),
+                                 NIPQUAD(dest->addr.ip),
                                  atomic_read(&rt->u.dst.__refcnt), rtos);
                }
                spin_unlock(&dest->dst_lock);
@@ -94,14 +98,14 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
                        .oif = 0,
                        .nl_u = {
                                .ip4_u = {
-                                       .daddr = cp->daddr,
+                                       .daddr = cp->daddr.ip,
                                        .saddr = 0,
                                        .tos = rtos, } },
                };
 
                if (ip_route_output_key(&init_net, &rt, &fl)) {
                        IP_VS_DBG_RL("ip_route_output error, dest: "
-                                    "%u.%u.%u.%u\n", NIPQUAD(cp->daddr));
+                                    "%u.%u.%u.%u\n", NIPQUAD(cp->daddr.ip));
                        return NULL;
                }
        }
@@ -109,6 +113,70 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
        return rt;
 }
 
+#ifdef CONFIG_IP_VS_IPV6
+static struct rt6_info *
+__ip_vs_get_out_rt_v6(struct ip_vs_conn *cp)
+{
+       struct rt6_info *rt;                    /* Route to the other host */
+       struct ip_vs_dest *dest = cp->dest;
+
+       if (dest) {
+               spin_lock(&dest->dst_lock);
+               rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0, 0);
+               if (!rt) {
+                       struct flowi fl = {
+                               .oif = 0,
+                               .nl_u = {
+                                       .ip6_u = {
+                                               .daddr = dest->addr.in6,
+                                               .saddr = {
+                                                       .s6_addr32 =
+                                                               { 0, 0, 0, 0 },
+                                               },
+                                       },
+                               },
+                       };
+
+                       rt = (struct rt6_info *)ip6_route_output(&init_net,
+                                                                NULL, &fl);
+                       if (!rt) {
+                               spin_unlock(&dest->dst_lock);
+                               IP_VS_DBG_RL("ip6_route_output error, "
+                                            "dest: " NIP6_FMT "\n",
+                                            NIP6(dest->addr.in6));
+                               return NULL;
+                       }
+                       __ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst));
+                       IP_VS_DBG(10, "new dst " NIP6_FMT ", refcnt=%d\n",
+                                 NIP6(dest->addr.in6),
+                                 atomic_read(&rt->u.dst.__refcnt));
+               }
+               spin_unlock(&dest->dst_lock);
+       } else {
+               struct flowi fl = {
+                       .oif = 0,
+                       .nl_u = {
+                               .ip6_u = {
+                                       .daddr = cp->daddr.in6,
+                                       .saddr = {
+                                               .s6_addr32 = { 0, 0, 0, 0 },
+                                       },
+                               },
+                       },
+               };
+
+               rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+               if (!rt) {
+                       IP_VS_DBG_RL("ip6_route_output error, dest: "
+                                    NIP6_FMT "\n", NIP6(cp->daddr.in6));
+                       return NULL;
+               }
+       }
+
+       return rt;
+}
+#endif
+
 
 /*
  *     Release dest->dst_cache before a dest is removed
@@ -123,11 +191,11 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
        dst_release(old_dst);
 }
 
-#define IP_VS_XMIT(skb, rt)                            \
+#define IP_VS_XMIT(pf, skb, rt)                                \
 do {                                                   \
        (skb)->ipvs_property = 1;                       \
        skb_forward_csum(skb);                          \
-       NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, (skb), NULL,        \
+       NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,     \
                (rt)->u.dst.dev, dst_output);           \
 } while (0)
 
@@ -200,7 +268,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
        /* Another hack: avoid icmp_send in ip_fragment */
        skb->local_df = 1;
 
-       IP_VS_XMIT(skb, rt);
+       IP_VS_XMIT(PF_INET, skb, rt);
 
        LeaveFunction(10);
        return NF_STOLEN;
@@ -213,6 +281,70 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
        return NF_STOLEN;
 }
 
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+                    struct ip_vs_protocol *pp)
+{
+       struct rt6_info *rt;                    /* Route to the other host */
+       struct ipv6hdr  *iph = ipv6_hdr(skb);
+       int    mtu;
+       struct flowi fl = {
+               .oif = 0,
+               .nl_u = {
+                       .ip6_u = {
+                               .daddr = iph->daddr,
+                               .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
+       };
+
+       EnterFunction(10);
+
+       rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+       if (!rt) {
+               IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): ip6_route_output error, "
+                            "dest: " NIP6_FMT "\n", NIP6(iph->daddr));
+               goto tx_error_icmp;
+       }
+
+       /* MTU checking */
+       mtu = dst_mtu(&rt->u.dst);
+       if (skb->len > mtu) {
+               dst_release(&rt->u.dst);
+               icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+               IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): frag needed\n");
+               goto tx_error;
+       }
+
+       /*
+        * Call ip_send_check because we are not sure it is called
+        * after ip_defrag. Is copy-on-write needed?
+        */
+       skb = skb_share_check(skb, GFP_ATOMIC);
+       if (unlikely(skb == NULL)) {
+               dst_release(&rt->u.dst);
+               return NF_STOLEN;
+       }
+
+       /* drop old route */
+       dst_release(skb->dst);
+       skb->dst = &rt->u.dst;
+
+       /* Another hack: avoid icmp_send in ip_fragment */
+       skb->local_df = 1;
+
+       IP_VS_XMIT(PF_INET6, skb, rt);
+
+       LeaveFunction(10);
+       return NF_STOLEN;
+
+ tx_error_icmp:
+       dst_link_failure(skb);
+ tx_error:
+       kfree_skb(skb);
+       LeaveFunction(10);
+       return NF_STOLEN;
+}
+#endif
 
 /*
  *      NAT transmitter (only for outside-to-inside nat forwarding)
@@ -264,7 +396,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
        /* mangle the packet */
        if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
                goto tx_error;
-       ip_hdr(skb)->daddr = cp->daddr;
+       ip_hdr(skb)->daddr = cp->daddr.ip;
        ip_send_check(ip_hdr(skb));
 
        IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
@@ -276,7 +408,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
        /* Another hack: avoid icmp_send in ip_fragment */
        skb->local_df = 1;
 
-       IP_VS_XMIT(skb, rt);
+       IP_VS_XMIT(PF_INET, skb, rt);
 
        LeaveFunction(10);
        return NF_STOLEN;
@@ -292,6 +424,83 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
        goto tx_error;
 }
 
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+                 struct ip_vs_protocol *pp)
+{
+       struct rt6_info *rt;            /* Route to the other host */
+       int mtu;
+
+       EnterFunction(10);
+
+       /* check if it is a connection of no-client-port */
+       if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
+               __be16 _pt, *p;
+               p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
+                                      sizeof(_pt), &_pt);
+               if (p == NULL)
+                       goto tx_error;
+               ip_vs_conn_fill_cport(cp, *p);
+               IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
+       }
+
+       rt = __ip_vs_get_out_rt_v6(cp);
+       if (!rt)
+               goto tx_error_icmp;
+
+       /* MTU checking */
+       mtu = dst_mtu(&rt->u.dst);
+       if (skb->len > mtu) {
+               dst_release(&rt->u.dst);
+               icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+               IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+                                "ip_vs_nat_xmit_v6(): frag needed for");
+               goto tx_error;
+       }
+
+       /* copy-on-write the packet before mangling it */
+       if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
+               goto tx_error_put;
+
+       if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+               goto tx_error_put;
+
+       /* drop old route */
+       dst_release(skb->dst);
+       skb->dst = &rt->u.dst;
+
+       /* mangle the packet */
+       if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
+               goto tx_error;
+       ipv6_hdr(skb)->daddr = cp->daddr.in6;
+
+       IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
+
+       /* FIXME: when application helper enlarges the packet and the length
+          is larger than the MTU of outgoing device, there will be still
+          MTU problem. */
+
+       /* Another hack: avoid icmp_send in ip_fragment */
+       skb->local_df = 1;
+
+       IP_VS_XMIT(PF_INET6, skb, rt);
+
+       LeaveFunction(10);
+       return NF_STOLEN;
+
+tx_error_icmp:
+       dst_link_failure(skb);
+tx_error:
+       LeaveFunction(10);
+       kfree_skb(skb);
+       return NF_STOLEN;
+tx_error_put:
+       dst_release(&rt->u.dst);
+       goto tx_error;
+}
+#endif
+
 
 /*
  *   IP Tunneling transmitter
@@ -423,6 +632,112 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
        return NF_STOLEN;
 }
 
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+                    struct ip_vs_protocol *pp)
+{
+       struct rt6_info *rt;            /* Route to the other host */
+       struct net_device *tdev;        /* Device to other host */
+       struct ipv6hdr  *old_iph = ipv6_hdr(skb);
+       sk_buff_data_t old_transport_header = skb->transport_header;
+       struct ipv6hdr  *iph;           /* Our new IP header */
+       unsigned int max_headroom;      /* The extra header space needed */
+       int    mtu;
+
+       EnterFunction(10);
+
+       if (skb->protocol != htons(ETH_P_IPV6)) {
+               IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): protocol error, "
+                            "ETH_P_IPV6: %d, skb protocol: %d\n",
+                            htons(ETH_P_IPV6), skb->protocol);
+               goto tx_error;
+       }
+
+       rt = __ip_vs_get_out_rt_v6(cp);
+       if (!rt)
+               goto tx_error_icmp;
+
+       tdev = rt->u.dst.dev;
+
+       mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr);
+       /* TODO IPv6: do we need this check in IPv6? */
+       if (mtu < 1280) {
+               dst_release(&rt->u.dst);
+               IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): mtu less than 1280\n");
+               goto tx_error;
+       }
+       if (skb->dst)
+               skb->dst->ops->update_pmtu(skb->dst, mtu);
+
+       if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
+               icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+               dst_release(&rt->u.dst);
+               IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): frag needed\n");
+               goto tx_error;
+       }
+
+       /*
+        * Okay, now see if we can stuff it in the buffer as-is.
+        */
+       max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
+
+       if (skb_headroom(skb) < max_headroom
+           || skb_cloned(skb) || skb_shared(skb)) {
+               struct sk_buff *new_skb =
+                       skb_realloc_headroom(skb, max_headroom);
+               if (!new_skb) {
+                       dst_release(&rt->u.dst);
+                       kfree_skb(skb);
+                       IP_VS_ERR_RL("ip_vs_tunnel_xmit_v6(): no memory\n");
+                       return NF_STOLEN;
+               }
+               kfree_skb(skb);
+               skb = new_skb;
+               old_iph = ipv6_hdr(skb);
+       }
+
+       skb->transport_header = old_transport_header;
+
+       skb_push(skb, sizeof(struct ipv6hdr));
+       skb_reset_network_header(skb);
+       memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
+       /* drop old route */
+       dst_release(skb->dst);
+       skb->dst = &rt->u.dst;
+
+       /*
+        *      Push down and install the IPIP header.
+        */
+       iph                     =       ipv6_hdr(skb);
+       iph->version            =       6;
+       iph->nexthdr            =       IPPROTO_IPV6;
+       iph->payload_len        =       old_iph->payload_len + sizeof(old_iph);
+       iph->priority           =       old_iph->priority;
+       memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
+       iph->daddr              =       rt->rt6i_dst.addr;
+       iph->saddr              =       cp->vaddr.in6; /* rt->rt6i_src.addr; */
+       iph->hop_limit          =       old_iph->hop_limit;
+
+       /* Another hack: avoid icmp_send in ip_fragment */
+       skb->local_df = 1;
+
+       ip6_local_out(skb);
+
+       LeaveFunction(10);
+
+       return NF_STOLEN;
+
+tx_error_icmp:
+       dst_link_failure(skb);
+tx_error:
+       kfree_skb(skb);
+       LeaveFunction(10);
+       return NF_STOLEN;
+}
+#endif
+
 
 /*
  *      Direct Routing transmitter
@@ -467,7 +782,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
        /* Another hack: avoid icmp_send in ip_fragment */
        skb->local_df = 1;
 
-       IP_VS_XMIT(skb, rt);
+       IP_VS_XMIT(PF_INET, skb, rt);
 
        LeaveFunction(10);
        return NF_STOLEN;
@@ -480,6 +795,60 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
        return NF_STOLEN;
 }
 
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+                struct ip_vs_protocol *pp)
+{
+       struct rt6_info *rt;                    /* Route to the other host */
+       int    mtu;
+
+       EnterFunction(10);
+
+       rt = __ip_vs_get_out_rt_v6(cp);
+       if (!rt)
+               goto tx_error_icmp;
+
+       /* MTU checking */
+       mtu = dst_mtu(&rt->u.dst);
+       if (skb->len > mtu) {
+               icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+               dst_release(&rt->u.dst);
+               IP_VS_DBG_RL("ip_vs_dr_xmit_v6(): frag needed\n");
+               goto tx_error;
+       }
+
+       /*
+        * Call ip_send_check because we are not sure it is called
+        * after ip_defrag. Is copy-on-write needed?
+        */
+       skb = skb_share_check(skb, GFP_ATOMIC);
+       if (unlikely(skb == NULL)) {
+               dst_release(&rt->u.dst);
+               return NF_STOLEN;
+       }
+
+       /* drop old route */
+       dst_release(skb->dst);
+       skb->dst = &rt->u.dst;
+
+       /* Another hack: avoid icmp_send in ip_fragment */
+       skb->local_df = 1;
+
+       IP_VS_XMIT(PF_INET6, skb, rt);
+
+       LeaveFunction(10);
+       return NF_STOLEN;
+
+tx_error_icmp:
+       dst_link_failure(skb);
+tx_error:
+       kfree_skb(skb);
+       LeaveFunction(10);
+       return NF_STOLEN;
+}
+#endif
+
 
 /*
  *     ICMP packet transmitter
@@ -540,7 +909,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
        /* Another hack: avoid icmp_send in ip_fragment */
        skb->local_df = 1;
 
-       IP_VS_XMIT(skb, rt);
+       IP_VS_XMIT(PF_INET, skb, rt);
 
        rc = NF_STOLEN;
        goto out;
@@ -557,3 +926,79 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
        ip_rt_put(rt);
        goto tx_error;
 }
+
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+               struct ip_vs_protocol *pp, int offset)
+{
+       struct rt6_info *rt;    /* Route to the other host */
+       int mtu;
+       int rc;
+
+       EnterFunction(10);
+
+       /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
+          forwarded directly here, because there is no need to
+          translate address/port back */
+       if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
+               if (cp->packet_xmit)
+                       rc = cp->packet_xmit(skb, cp, pp);
+               else
+                       rc = NF_ACCEPT;
+               /* do not touch skb anymore */
+               atomic_inc(&cp->in_pkts);
+               goto out;
+       }
+
+       /*
+        * mangle and send the packet here (only for VS/NAT)
+        */
+
+       rt = __ip_vs_get_out_rt_v6(cp);
+       if (!rt)
+               goto tx_error_icmp;
+
+       /* MTU checking */
+       mtu = dst_mtu(&rt->u.dst);
+       if (skb->len > mtu) {
+               dst_release(&rt->u.dst);
+               icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+               IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
+               goto tx_error;
+       }
+
+       /* copy-on-write the packet before mangling it */
+       if (!skb_make_writable(skb, offset))
+               goto tx_error_put;
+
+       if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+               goto tx_error_put;
+
+       /* drop the old route when skb is not shared */
+       dst_release(skb->dst);
+       skb->dst = &rt->u.dst;
+
+       ip_vs_nat_icmp_v6(skb, pp, cp, 0);
+
+       /* Another hack: avoid icmp_send in ip_fragment */
+       skb->local_df = 1;
+
+       IP_VS_XMIT(PF_INET6, skb, rt);
+
+       rc = NF_STOLEN;
+       goto out;
+
+tx_error_icmp:
+       dst_link_failure(skb);
+tx_error:
+       dev_kfree_skb(skb);
+       rc = NF_STOLEN;
+out:
+       LeaveFunction(10);
+       return rc;
+tx_error_put:
+       dst_release(&rt->u.dst);
+       goto tx_error;
+}
+#endif