[SK_BUFF]: Introduce ip_hdr(), remove skb->nh.iph
[safe/jmp/linux-2.6] / net / ipv4 / netfilter / ip_conntrack_core.c
index 2e6e421..986c0c8 100644 (file)
@@ -303,6 +303,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
        struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
        struct ip_conntrack_protocol *proto;
        struct ip_conntrack_helper *helper;
+       typeof(ip_conntrack_destroyed) destroyed;
 
        DEBUGP("destroy_conntrack(%p)\n", ct);
        IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
@@ -318,12 +319,16 @@ destroy_conntrack(struct nf_conntrack *nfct)
        /* To make sure we don't get any weird locking issues here:
         * destroy_conntrack() MUST NOT be called with a write lock
         * to ip_conntrack_lock!!! -HW */
+       rcu_read_lock();
        proto = __ip_conntrack_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
        if (proto && proto->destroy)
                proto->destroy(ct);
 
-       if (ip_conntrack_destroyed)
-               ip_conntrack_destroyed(ct);
+       destroyed = rcu_dereference(ip_conntrack_destroyed);
+       if (destroyed)
+               destroyed(ct);
+
+       rcu_read_unlock();
 
        write_lock_bh(&ip_conntrack_lock);
        /* Expectations will have been removed in clean_from_lists,
@@ -536,7 +541,7 @@ static int early_drop(struct list_head *chain)
        if (del_timer(&ct->timeout)) {
                death_by_timeout((unsigned long)ct);
                dropped = 1;
-               CONNTRACK_STAT_INC(early_drop);
+               CONNTRACK_STAT_INC_ATOMIC(early_drop);
        }
        ip_conntrack_put(ct);
        return dropped;
@@ -595,13 +600,13 @@ ip_conntrack_proto_find_get(u_int8_t protocol)
 {
        struct ip_conntrack_protocol *p;
 
-       preempt_disable();
+       rcu_read_lock();
        p = __ip_conntrack_proto_find(protocol);
        if (p) {
                if (!try_module_get(p->me))
                        p = &ip_conntrack_generic_protocol;
        }
-       preempt_enable();
+       rcu_read_unlock();
 
        return p;
 }
@@ -638,14 +643,13 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
                }
        }
 
-       conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
+       conntrack = kmem_cache_zalloc(ip_conntrack_cachep, GFP_ATOMIC);
        if (!conntrack) {
                DEBUGP("Can't allocate conntrack.\n");
                atomic_dec(&ip_conntrack_count);
                return ERR_PTR(-ENOMEM);
        }
 
-       memset(conntrack, 0, sizeof(*conntrack));
        atomic_set(&conntrack->ct_general.use, 1);
        conntrack->ct_general.destroy = destroy_conntrack;
        conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
@@ -744,10 +748,9 @@ resolve_normal_ct(struct sk_buff *skb,
        struct ip_conntrack_tuple_hash *h;
        struct ip_conntrack *ct;
 
-       IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
+       IP_NF_ASSERT((ip_hdr(skb)->frag_off & htons(IP_OFFSET)) == 0);
 
-       if (!ip_ct_get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4,
-                               &tuple,proto))
+       if (!ip_ct_get_tuple(ip_hdr(skb), skb, ip_hdrlen(skb), &tuple,proto))
                return NULL;
 
        /* look for tuple match */
@@ -803,15 +806,15 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
 
        /* Previously seen (loopback or untracked)?  Ignore. */
        if ((*pskb)->nfct) {
-               CONNTRACK_STAT_INC(ignore);
+               CONNTRACK_STAT_INC_ATOMIC(ignore);
                return NF_ACCEPT;
        }
 
        /* Never happen */
-       if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
+       if (ip_hdr(*pskb)->frag_off & htons(IP_OFFSET)) {
                if (net_ratelimit()) {
                printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n",
-                      (*pskb)->nh.iph->protocol, hooknum);
+                      ip_hdr(*pskb)->protocol, hooknum);
                }
                return NF_DROP;
        }
@@ -822,36 +825,37 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
        if ((*pskb)->pkt_type == PACKET_BROADCAST) {
                printk("Broadcast packet!\n");
                return NF_ACCEPT;
-       } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF))
+       } else if ((ip_hdr(*pskb)->daddr & htonl(0x000000FF))
                   == htonl(0x000000FF)) {
                printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
-                      NIPQUAD((*pskb)->nh.iph->saddr),
-                      NIPQUAD((*pskb)->nh.iph->daddr),
+                      NIPQUAD(ip_hdr(*pskb)->saddr),
+                      NIPQUAD(ip_hdr(*pskb)->daddr),
                       (*pskb)->sk, (*pskb)->pkt_type);
        }
 #endif
 
-       proto = __ip_conntrack_proto_find((*pskb)->nh.iph->protocol);
+       /* rcu_read_lock()ed by nf_hook_slow */
+       proto = __ip_conntrack_proto_find(ip_hdr(*pskb)->protocol);
 
        /* It may be an special packet, error, unclean...
         * inverse of the return code tells to the netfilter
         * core what to do with the packet. */
        if (proto->error != NULL
            && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) {
-               CONNTRACK_STAT_INC(error);
-               CONNTRACK_STAT_INC(invalid);
+               CONNTRACK_STAT_INC_ATOMIC(error);
+               CONNTRACK_STAT_INC_ATOMIC(invalid);
                return -ret;
        }
 
        if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) {
                /* Not valid part of a connection */
-               CONNTRACK_STAT_INC(invalid);
+               CONNTRACK_STAT_INC_ATOMIC(invalid);
                return NF_ACCEPT;
        }
 
        if (IS_ERR(ct)) {
                /* Too stressed to deal. */
-               CONNTRACK_STAT_INC(drop);
+               CONNTRACK_STAT_INC_ATOMIC(drop);
                return NF_DROP;
        }
 
@@ -863,7 +867,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
                 * the netfilter core what to do*/
                nf_conntrack_put((*pskb)->nfct);
                (*pskb)->nfct = NULL;
-               CONNTRACK_STAT_INC(invalid);
+               CONNTRACK_STAT_INC_ATOMIC(invalid);
                return -ret;
        }
 
@@ -876,8 +880,15 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
 int invert_tuplepr(struct ip_conntrack_tuple *inverse,
                   const struct ip_conntrack_tuple *orig)
 {
-       return ip_ct_invert_tuple(inverse, orig,
-                                 __ip_conntrack_proto_find(orig->dst.protonum));
+       struct ip_conntrack_protocol *proto;
+       int ret;
+
+       rcu_read_lock();
+       proto = __ip_conntrack_proto_find(orig->dst.protonum);
+       ret = ip_ct_invert_tuple(inverse, orig, proto);
+       rcu_read_unlock();
+
+       return ret;
 }
 
 /* Would two expected things clash? */
@@ -1141,7 +1152,7 @@ void __ip_ct_refresh_acct(struct ip_conntrack *ct,
        if (do_acct) {
                ct->counters[CTINFO2DIR(ctinfo)].packets++;
                ct->counters[CTINFO2DIR(ctinfo)].bytes +=
-                                               ntohs(skb->nh.iph->tot_len);
+                                               ntohs(ip_hdr(skb)->tot_len);
                if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
                    || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
                        event |= IPCT_COUNTER_FILLING;
@@ -1199,7 +1210,7 @@ ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
        local_bh_enable();
 
        if (skb)
-               ip_send_check(skb->nh.iph);
+               ip_send_check(ip_hdr(skb));
        return skb;
 }
 
@@ -1242,7 +1253,7 @@ get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
        list_for_each_entry(h, &unconfirmed, list) {
                ct = tuplehash_to_ctrack(h);
                if (iter(ct, data))
-                       goto found;
+                       set_bit(IPS_DYING_BIT, &ct->status);
        }
        write_unlock_bh(&ip_conntrack_lock);
        return NULL;
@@ -1355,7 +1366,7 @@ static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
    supposed to kill the mall. */
 void ip_conntrack_cleanup(void)
 {
-       ip_ct_attach = NULL;
+       rcu_assign_pointer(ip_ct_attach, NULL);
 
        /* This makes sure all current packets have passed through
           netfilter framework.  Roll on, two-stage module
@@ -1508,15 +1519,15 @@ int __init ip_conntrack_init(void)
        /* Don't NEED lock here, but good form anyway. */
        write_lock_bh(&ip_conntrack_lock);
        for (i = 0; i < MAX_IP_CT_PROTO; i++)
-               ip_ct_protos[i] = &ip_conntrack_generic_protocol;
+               rcu_assign_pointer(ip_ct_protos[i], &ip_conntrack_generic_protocol);
        /* Sew in builtin protocols. */
-       ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
-       ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
-       ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
+       rcu_assign_pointer(ip_ct_protos[IPPROTO_TCP], &ip_conntrack_protocol_tcp);
+       rcu_assign_pointer(ip_ct_protos[IPPROTO_UDP], &ip_conntrack_protocol_udp);
+       rcu_assign_pointer(ip_ct_protos[IPPROTO_ICMP], &ip_conntrack_protocol_icmp);
        write_unlock_bh(&ip_conntrack_lock);
 
        /* For use by ipt_REJECT */
-       ip_ct_attach = ip_conntrack_attach;
+       rcu_assign_pointer(ip_ct_attach, ip_conntrack_attach);
 
        /* Set up fake conntrack:
            - to never be deleted, not in any hashes */