netfilter: nf_conntrack: log packets dropped by helpers
[safe/jmp/linux-2.6] / net / ipv4 / netfilter / nf_conntrack_l3proto_ipv4.c
index 64552af..aa95bb8 100644 (file)
@@ -1,3 +1,4 @@
+
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
  *
 #include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+#include <net/netfilter/nf_log.h>
+
+int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb,
+                             struct nf_conn *ct,
+                             enum ip_conntrack_info ctinfo);
+EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook);
 
-static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
-                            struct nf_conntrack_tuple *tuple)
+static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
+                             struct nf_conntrack_tuple *tuple)
 {
-       __be32 _addrs[2], *ap;
+       const __be32 *ap;
+       __be32 _addrs[2];
        ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr),
                                sizeof(u_int32_t) * 2, _addrs);
        if (ap == NULL)
-               return 0;
+               return false;
 
        tuple->src.u3.ip = ap[0];
        tuple->dst.u3.ip = ap[1];
 
-       return 1;
+       return true;
 }
 
-static int ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
-                          const struct nf_conntrack_tuple *orig)
+static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
+                             const struct nf_conntrack_tuple *orig)
 {
        tuple->src.u3.ip = orig->dst.u3.ip;
        tuple->dst.u3.ip = orig->src.u3.ip;
 
-       return 1;
+       return true;
 }
 
 static int ipv4_print_tuple(struct seq_file *s,
                            const struct nf_conntrack_tuple *tuple)
 {
-       return seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ",
-                         NIPQUAD(tuple->src.u3.ip),
-                         NIPQUAD(tuple->dst.u3.ip));
-}
-
-static int ipv4_print_conntrack(struct seq_file *s,
-                               const struct nf_conn *conntrack)
-{
-       return 0;
-}
-
-/* Returns new sk_buff, or NULL */
-static struct sk_buff *
-nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
-{
-       skb_orphan(skb);
-
-       local_bh_disable();
-       skb = ip_defrag(skb, user);
-       local_bh_enable();
-
-       if (skb)
-               ip_send_check(ip_hdr(skb));
-
-       return skb;
+       return seq_printf(s, "src=%pI4 dst=%pI4 ",
+                         &tuple->src.u3.ip, &tuple->dst.u3.ip);
 }
 
 static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
                            unsigned int *dataoff, u_int8_t *protonum)
 {
-       struct iphdr _iph, *iph;
+       const struct iphdr *iph;
+       struct iphdr _iph;
 
        iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
        if (iph == NULL)
                return -NF_DROP;
 
-       /* Never happen */
-       if (iph->frag_off & htons(IP_OFFSET)) {
-               if (net_ratelimit()) {
-                       printk(KERN_ERR "ipv4_get_l4proto: Frag of proto %u\n",
-                       iph->protocol);
-               }
+       /* Conntrack defragments packets, we might still see fragments
+        * inside ICMP packets though. */
+       if (iph->frag_off & htons(IP_OFFSET))
                return -NF_DROP;
-       }
 
        *dataoff = nhoff + (iph->ihl << 2);
        *protonum = iph->protocol;
@@ -103,147 +87,104 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 }
 
 static unsigned int ipv4_confirm(unsigned int hooknum,
-                                struct sk_buff **pskb,
+                                struct sk_buff *skb,
                                 const struct net_device *in,
                                 const struct net_device *out,
                                 int (*okfn)(struct sk_buff *))
 {
-       /* We've seen it coming out the other side: confirm it */
-       return nf_conntrack_confirm(pskb);
-}
-
-static unsigned int ipv4_conntrack_help(unsigned int hooknum,
-                                     struct sk_buff **pskb,
-                                     const struct net_device *in,
-                                     const struct net_device *out,
-                                     int (*okfn)(struct sk_buff *))
-{
        struct nf_conn *ct;
        enum ip_conntrack_info ctinfo;
-       struct nf_conn_help *help;
-       struct nf_conntrack_helper *helper;
+       const struct nf_conn_help *help;
+       const struct nf_conntrack_helper *helper;
+       unsigned int ret;
 
        /* This is where we call the helper: as the packet goes out. */
-       ct = nf_ct_get(*pskb, &ctinfo);
+       ct = nf_ct_get(skb, &ctinfo);
        if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
-               return NF_ACCEPT;
+               goto out;
 
        help = nfct_help(ct);
        if (!help)
-               return NF_ACCEPT;
+               goto out;
+
        /* rcu_read_lock()ed by nf_hook_slow */
        helper = rcu_dereference(help->helper);
        if (!helper)
-               return NF_ACCEPT;
-       return helper->help(pskb, skb_network_offset(*pskb) + ip_hdrlen(*pskb),
-                           ct, ctinfo);
-}
+               goto out;
 
-static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
-                                         struct sk_buff **pskb,
-                                         const struct net_device *in,
-                                         const struct net_device *out,
-                                         int (*okfn)(struct sk_buff *))
-{
-       /* Previously seen (loopback)?  Ignore.  Do this before
-          fragment check. */
-       if ((*pskb)->nfct)
-               return NF_ACCEPT;
+       ret = helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
+                          ct, ctinfo);
+       if (ret != NF_ACCEPT) {
+               nf_log_packet(NFPROTO_IPV4, hooknum, skb, in, out, NULL,
+                             "nf_ct_%s: dropping packet", helper->name);
+               return ret;
+       }
 
-       /* Gather fragments. */
-       if (ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)) {
-               *pskb = nf_ct_ipv4_gather_frags(*pskb,
-                                               hooknum == NF_IP_PRE_ROUTING ?
-                                               IP_DEFRAG_CONNTRACK_IN :
-                                               IP_DEFRAG_CONNTRACK_OUT);
-               if (!*pskb)
-                       return NF_STOLEN;
+       if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
+               typeof(nf_nat_seq_adjust_hook) seq_adjust;
+
+               seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
+               if (!seq_adjust || !seq_adjust(skb, ct, ctinfo)) {
+                       NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
+                       return NF_DROP;
+               }
        }
-       return NF_ACCEPT;
+out:
+       /* We've seen it coming out the other side: confirm it */
+       return nf_conntrack_confirm(skb);
 }
 
 static unsigned int ipv4_conntrack_in(unsigned int hooknum,
-                                     struct sk_buff **pskb,
+                                     struct sk_buff *skb,
                                      const struct net_device *in,
                                      const struct net_device *out,
                                      int (*okfn)(struct sk_buff *))
 {
-       return nf_conntrack_in(PF_INET, hooknum, pskb);
+       return nf_conntrack_in(dev_net(in), PF_INET, hooknum, skb);
 }
 
 static unsigned int ipv4_conntrack_local(unsigned int hooknum,
-                                        struct sk_buff **pskb,
+                                        struct sk_buff *skb,
                                         const struct net_device *in,
                                         const struct net_device *out,
                                         int (*okfn)(struct sk_buff *))
 {
        /* root is playing with raw sockets. */
-       if ((*pskb)->len < sizeof(struct iphdr)
-           || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
-               if (net_ratelimit())
-                       printk("ipt_hook: happy cracking.\n");
+       if (skb->len < sizeof(struct iphdr) ||
+           ip_hdrlen(skb) < sizeof(struct iphdr))
                return NF_ACCEPT;
-       }
-       return nf_conntrack_in(PF_INET, hooknum, pskb);
+       return nf_conntrack_in(dev_net(out), PF_INET, hooknum, skb);
 }
 
 /* Connection tracking may drop packets, but never alters them, so
    make it the first hook. */
-static struct nf_hook_ops ipv4_conntrack_ops[] = {
-       {
-               .hook           = ipv4_conntrack_defrag,
-               .owner          = THIS_MODULE,
-               .pf             = PF_INET,
-               .hooknum        = NF_IP_PRE_ROUTING,
-               .priority       = NF_IP_PRI_CONNTRACK_DEFRAG,
-       },
+static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
        {
                .hook           = ipv4_conntrack_in,
                .owner          = THIS_MODULE,
-               .pf             = PF_INET,
-               .hooknum        = NF_IP_PRE_ROUTING,
+               .pf             = NFPROTO_IPV4,
+               .hooknum        = NF_INET_PRE_ROUTING,
                .priority       = NF_IP_PRI_CONNTRACK,
        },
        {
-               .hook           = ipv4_conntrack_defrag,
-               .owner          = THIS_MODULE,
-               .pf             = PF_INET,
-               .hooknum        = NF_IP_LOCAL_OUT,
-               .priority       = NF_IP_PRI_CONNTRACK_DEFRAG,
-       },
-       {
                .hook           = ipv4_conntrack_local,
                .owner          = THIS_MODULE,
-               .pf             = PF_INET,
-               .hooknum        = NF_IP_LOCAL_OUT,
+               .pf             = NFPROTO_IPV4,
+               .hooknum        = NF_INET_LOCAL_OUT,
                .priority       = NF_IP_PRI_CONNTRACK,
        },
        {
-               .hook           = ipv4_conntrack_help,
-               .owner          = THIS_MODULE,
-               .pf             = PF_INET,
-               .hooknum        = NF_IP_POST_ROUTING,
-               .priority       = NF_IP_PRI_CONNTRACK_HELPER,
-       },
-       {
-               .hook           = ipv4_conntrack_help,
-               .owner          = THIS_MODULE,
-               .pf             = PF_INET,
-               .hooknum        = NF_IP_LOCAL_IN,
-               .priority       = NF_IP_PRI_CONNTRACK_HELPER,
-       },
-       {
                .hook           = ipv4_confirm,
                .owner          = THIS_MODULE,
-               .pf             = PF_INET,
-               .hooknum        = NF_IP_POST_ROUTING,
+               .pf             = NFPROTO_IPV4,
+               .hooknum        = NF_INET_POST_ROUTING,
                .priority       = NF_IP_PRI_CONNTRACK_CONFIRM,
        },
        {
                .hook           = ipv4_confirm,
                .owner          = THIS_MODULE,
-               .pf             = PF_INET,
-               .hooknum        = NF_IP_LOCAL_IN,
+               .pf             = NFPROTO_IPV4,
+               .hooknum        = NF_INET_LOCAL_IN,
                .priority       = NF_IP_PRI_CONNTRACK_CONFIRM,
        },
 };
@@ -259,15 +200,15 @@ static ctl_table ip_ct_sysctl_table[] = {
                .data           = &nf_conntrack_max,
                .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = proc_dointvec,
        },
        {
                .ctl_name       = NET_IPV4_NF_CONNTRACK_COUNT,
                .procname       = "ip_conntrack_count",
-               .data           = &nf_conntrack_count,
+               .data           = &init_net.ct.count,
                .maxlen         = sizeof(int),
                .mode           = 0444,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = proc_dointvec,
        },
        {
                .ctl_name       = NET_IPV4_NF_CONNTRACK_BUCKETS,
@@ -275,24 +216,24 @@ static ctl_table ip_ct_sysctl_table[] = {
                .data           = &nf_conntrack_htable_size,
                .maxlen         = sizeof(unsigned int),
                .mode           = 0444,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = proc_dointvec,
        },
        {
                .ctl_name       = NET_IPV4_NF_CONNTRACK_CHECKSUM,
                .procname       = "ip_conntrack_checksum",
-               .data           = &nf_conntrack_checksum,
+               .data           = &init_net.ct.sysctl_checksum,
                .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = proc_dointvec,
        },
        {
                .ctl_name       = NET_IPV4_NF_CONNTRACK_LOG_INVALID,
                .procname       = "ip_conntrack_log_invalid",
-               .data           = &nf_ct_log_invalid,
+               .data           = &init_net.ct.sysctl_log_invalid,
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_minmax,
-               .strategy       = &sysctl_intvec,
+               .proc_handler   = proc_dointvec_minmax,
+               .strategy       = sysctl_intvec,
                .extra1         = &log_invalid_proto_min,
                .extra2         = &log_invalid_proto_max,
        },
@@ -309,21 +250,21 @@ static ctl_table ip_ct_sysctl_table[] = {
 static int
 getorigdst(struct sock *sk, int optval, void __user *user, int *len)
 {
-       struct inet_sock *inet = inet_sk(sk);
-       struct nf_conntrack_tuple_hash *h;
+       const struct inet_sock *inet = inet_sk(sk);
+       const struct nf_conntrack_tuple_hash *h;
        struct nf_conntrack_tuple tuple;
 
-       NF_CT_TUPLE_U_BLANK(&tuple);
+       memset(&tuple, 0, sizeof(tuple));
        tuple.src.u3.ip = inet->rcv_saddr;
        tuple.src.u.tcp.port = inet->sport;
        tuple.dst.u3.ip = inet->daddr;
        tuple.dst.u.tcp.port = inet->dport;
        tuple.src.l3num = PF_INET;
-       tuple.dst.protonum = IPPROTO_TCP;
+       tuple.dst.protonum = sk->sk_protocol;
 
-       /* We only do TCP at the moment: is there a better way? */
-       if (strcmp(sk->sk_prot->name, "TCP")) {
-               pr_debug("SO_ORIGINAL_DST: Not a TCP socket\n");
+       /* We only do TCP and SCTP at the moment: is there a better way? */
+       if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) {
+               pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
                return -ENOPROTOOPT;
        }
 
@@ -333,7 +274,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
                return -EINVAL;
        }
 
-       h = nf_conntrack_find_get(&tuple);
+       h = nf_conntrack_find_get(sock_net(sk), &tuple);
        if (h) {
                struct sockaddr_in sin;
                struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
@@ -345,17 +286,17 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
                        .tuple.dst.u3.ip;
                memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
 
-               pr_debug("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
-                        NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
+               pr_debug("SO_ORIGINAL_DST: %pI4 %u\n",
+                        &sin.sin_addr.s_addr, ntohs(sin.sin_port));
                nf_ct_put(ct);
                if (copy_to_user(user, &sin, sizeof(sin)) != 0)
                        return -EFAULT;
                else
                        return 0;
        }
-       pr_debug("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
-                NIPQUAD(tuple.src.u3.ip), ntohs(tuple.src.u.tcp.port),
-                NIPQUAD(tuple.dst.u3.ip), ntohs(tuple.dst.u.tcp.port));
+       pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n",
+                &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port),
+                &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port));
        return -ENOENT;
 }
 
@@ -364,38 +305,38 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_conntrack.h>
 
-static int ipv4_tuple_to_nfattr(struct sk_buff *skb,
+static int ipv4_tuple_to_nlattr(struct sk_buff *skb,
                                const struct nf_conntrack_tuple *tuple)
 {
-       NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t),
-               &tuple->src.u3.ip);
-       NFA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t),
-               &tuple->dst.u3.ip);
+       NLA_PUT_BE32(skb, CTA_IP_V4_SRC, tuple->src.u3.ip);
+       NLA_PUT_BE32(skb, CTA_IP_V4_DST, tuple->dst.u3.ip);
        return 0;
 
-nfattr_failure:
+nla_put_failure:
        return -1;
 }
 
-static const size_t cta_min_ip[CTA_IP_MAX] = {
-       [CTA_IP_V4_SRC-1]       = sizeof(u_int32_t),
-       [CTA_IP_V4_DST-1]       = sizeof(u_int32_t),
+static const struct nla_policy ipv4_nla_policy[CTA_IP_MAX+1] = {
+       [CTA_IP_V4_SRC] = { .type = NLA_U32 },
+       [CTA_IP_V4_DST] = { .type = NLA_U32 },
 };
 
-static int ipv4_nfattr_to_tuple(struct nfattr *tb[],
+static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
                                struct nf_conntrack_tuple *t)
 {
-       if (!tb[CTA_IP_V4_SRC-1] || !tb[CTA_IP_V4_DST-1])
+       if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST])
                return -EINVAL;
 
-       if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip))
-               return -EINVAL;
-
-       t->src.u3.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_SRC-1]);
-       t->dst.u3.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_DST-1]);
+       t->src.u3.ip = nla_get_be32(tb[CTA_IP_V4_SRC]);
+       t->dst.u3.ip = nla_get_be32(tb[CTA_IP_V4_DST]);
 
        return 0;
 }
+
+static int ipv4_nlattr_tuple_size(void)
+{
+       return nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1);
+}
 #endif
 
 static struct nf_sockopt_ops so_getorigdst = {
@@ -403,6 +344,7 @@ static struct nf_sockopt_ops so_getorigdst = {
        .get_optmin     = SO_ORIGINAL_DST,
        .get_optmax     = SO_ORIGINAL_DST+1,
        .get            = &getorigdst,
+       .owner          = THIS_MODULE,
 };
 
 struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
@@ -411,11 +353,12 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
        .pkt_to_tuple    = ipv4_pkt_to_tuple,
        .invert_tuple    = ipv4_invert_tuple,
        .print_tuple     = ipv4_print_tuple,
-       .print_conntrack = ipv4_print_conntrack,
        .get_l4proto     = ipv4_get_l4proto,
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
-       .tuple_to_nfattr = ipv4_tuple_to_nfattr,
-       .nfattr_to_tuple = ipv4_nfattr_to_tuple,
+       .tuple_to_nlattr = ipv4_tuple_to_nlattr,
+       .nlattr_tuple_size = ipv4_nlattr_tuple_size,
+       .nlattr_to_tuple = ipv4_nlattr_to_tuple,
+       .nla_policy      = ipv4_nla_policy,
 #endif
 #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
        .ctl_table_path  = nf_net_ipv4_netfilter_sysctl_path,
@@ -424,6 +367,9 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
        .me              = THIS_MODULE,
 };
 
+module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
+                 &nf_conntrack_htable_size, 0600);
+
 MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
 MODULE_ALIAS("ip_conntrack");
 MODULE_LICENSE("GPL");
@@ -433,6 +379,7 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
        int ret = 0;
 
        need_conntrack();
+       nf_defrag_ipv4_enable();
 
        ret = nf_register_sockopt(&so_getorigdst);
        if (ret < 0) {
@@ -509,3 +456,9 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void)
 
 module_init(nf_conntrack_l3proto_ipv4_init);
 module_exit(nf_conntrack_l3proto_ipv4_fini);
+
+void need_ipv4_conntrack(void)
+{
+       return;
+}
+EXPORT_SYMBOL_GPL(need_ipv4_conntrack);