netfilter: netns nf_conntrack: per-netns net.netfilter.nf_conntrack_log_invalid sysctl
[safe/jmp/linux-2.6] / net / dccp / ipv4.c
index c982ad8..e3dfdda 100644 (file)
 #include "feat.h"
 
 /*
- * This is the global socket data structure used for responding to
+ * The per-net dccp.v4_ctl_sk socket is used for responding to
  * the Out-of-the-blue (OOTB) packets. A control sock will be created
  * for this socket at the initialization time.
  */
-static struct socket *dccp_v4_ctl_socket;
-
-static int dccp_v4_get_port(struct sock *sk, const unsigned short snum)
-{
-       return inet_csk_get_port(&dccp_hashinfo, sk, snum,
-                                inet_csk_bind_conflict);
-}
 
 int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
@@ -203,8 +196,8 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk,
 static void dccp_v4_err(struct sk_buff *skb, u32 info)
 {
        const struct iphdr *iph = (struct iphdr *)skb->data;
-       const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data +
-                                                       (iph->ihl << 2));
+       const u8 offset = iph->ihl << 2;
+       const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
        struct dccp_sock *dp;
        struct inet_sock *inet;
        const int type = icmp_hdr(skb)->type;
@@ -212,16 +205,19 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
        struct sock *sk;
        __u64 seq;
        int err;
+       struct net *net = dev_net(skb->dev);
 
-       if (skb->len < (iph->ihl << 2) + 8) {
-               ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+       if (skb->len < offset + sizeof(*dh) ||
+           skb->len < offset + __dccp_basic_hdr_len(dh)) {
+               ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
                return;
        }
 
-       sk = inet_lookup(&init_net, &dccp_hashinfo, iph->daddr, dh->dccph_dport,
-                        iph->saddr, dh->dccph_sport, inet_iif(skb));
+       sk = inet_lookup(net, &dccp_hashinfo,
+                       iph->daddr, dh->dccph_dport,
+                       iph->saddr, dh->dccph_sport, inet_iif(skb));
        if (sk == NULL) {
-               ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+               ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
                return;
        }
 
@@ -235,7 +231,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
         * servers this needs to be solved differently.
         */
        if (sock_owned_by_user(sk))
-               NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
+               NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
 
        if (sk->sk_state == DCCP_CLOSED)
                goto out;
@@ -243,8 +239,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
        dp = dccp_sk(sk);
        seq = dccp_hdr_seq(dh);
        if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) &&
-           !between48(seq, dp->dccps_swl, dp->dccps_swh)) {
-               NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+           !between48(seq, dp->dccps_awl, dp->dccps_awh)) {
+               NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
                goto out;
        }
 
@@ -288,10 +284,10 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
                 * ICMPs are not backlogged, hence we cannot get an established
                 * socket here.
                 */
-               BUG_TRAP(!req->sk);
+               WARN_ON(req->sk);
 
                if (seq != dccp_rsk(req)->dreq_iss) {
-                       NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+                       NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
                        goto out;
                }
                /*
@@ -408,15 +404,15 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
 
        dccp_sync_mss(newsk, dst_mtu(dst));
 
-       __inet_hash_nolisten(&dccp_hashinfo, newsk);
-       __inet_inherit_port(&dccp_hashinfo, sk, newsk);
+       __inet_hash_nolisten(newsk);
+       __inet_inherit_port(sk, newsk);
 
        return newsk;
 
 exit_overflow:
-       NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
+       NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
 exit:
-       NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
+       NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
        dst_release(dst);
        return NULL;
 }
@@ -436,7 +432,7 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
        if (req != NULL)
                return dccp_check_req(sk, skb, req, prev);
 
-       nsk = inet_lookup_established(&init_net, &dccp_hashinfo,
+       nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo,
                                      iph->saddr, dh->dccph_sport,
                                      iph->daddr, dh->dccph_dport,
                                      inet_iif(skb));
@@ -452,11 +448,11 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
        return sk;
 }
 
-static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
+static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
                                           struct sk_buff *skb)
 {
        struct rtable *rt;
-       struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif,
+       struct flowi fl = { .oif = skb->rtable->rt_iif,
                            .nl_u = { .ip4_u =
                                      { .daddr = ip_hdr(skb)->saddr,
                                        .saddr = ip_hdr(skb)->daddr,
@@ -469,23 +465,22 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
                          };
 
        security_skb_classify_flow(skb, &fl);
-       if (ip_route_output_flow(&init_net, &rt, &fl, sk, 0)) {
-               IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+       if (ip_route_output_flow(net, &rt, &fl, sk, 0)) {
+               IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
                return NULL;
        }
 
        return &rt->u.dst;
 }
 
-static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
-                                struct dst_entry *dst)
+static int dccp_v4_send_response(struct sock *sk, struct request_sock *req)
 {
        int err = -1;
        struct sk_buff *skb;
+       struct dst_entry *dst;
 
-       /* First, grab a route. */
-
-       if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL)
+       dst = inet_csk_route_req(sk, req);
+       if (dst == NULL)
                goto out;
 
        skb = dccp_make_response(sk, dst, req);
@@ -495,7 +490,6 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
 
                dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->loc_addr,
                                                              ireq->rmt_addr);
-               memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
                err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
                                            ireq->rmt_addr,
                                            ireq->opt);
@@ -513,19 +507,21 @@ static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
        const struct iphdr *rxiph;
        struct sk_buff *skb;
        struct dst_entry *dst;
+       struct net *net = dev_net(rxskb->dst->dev);
+       struct sock *ctl_sk = net->dccp.v4_ctl_sk;
 
        /* Never send a reset in response to a reset. */
        if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET)
                return;
 
-       if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL)
+       if (rxskb->rtable->rt_type != RTN_LOCAL)
                return;
 
-       dst = dccp_v4_route_skb(dccp_v4_ctl_socket->sk, rxskb);
+       dst = dccp_v4_route_skb(net, ctl_sk, rxskb);
        if (dst == NULL)
                return;
 
-       skb = dccp_ctl_make_reset(dccp_v4_ctl_socket, rxskb);
+       skb = dccp_ctl_make_reset(ctl_sk, rxskb);
        if (skb == NULL)
                goto out;
 
@@ -534,10 +530,10 @@ static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
                                                                 rxiph->daddr);
        skb->dst = dst_clone(dst);
 
-       bh_lock_sock(dccp_v4_ctl_socket->sk);
-       err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk,
+       bh_lock_sock(ctl_sk);
+       err = ip_build_and_send_pkt(skb, ctl_sk,
                                    rxiph->daddr, rxiph->saddr, NULL);
-       bh_unlock_sock(dccp_v4_ctl_socket->sk);
+       bh_unlock_sock(ctl_sk);
 
        if (net_xmit_eval(err) == 0) {
                DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
@@ -570,8 +566,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
        struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
 
        /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
-       if (((struct rtable *)skb->dst)->rt_flags &
-           (RTCF_BROADCAST | RTCF_MULTICAST))
+       if (skb->rtable->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
                return 0;       /* discard, don't send a reset here */
 
        if (dccp_bad_service_code(sk, service)) {
@@ -596,7 +591,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
        if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
                goto drop;
 
-       req = reqsk_alloc(&dccp_request_sock_ops);
+       req = inet_reqsk_alloc(&dccp_request_sock_ops);
        if (req == NULL)
                goto drop;
 
@@ -612,7 +607,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
        ireq = inet_rsk(req);
        ireq->loc_addr = ip_hdr(skb)->daddr;
        ireq->rmt_addr = ip_hdr(skb)->saddr;
-       ireq->opt       = NULL;
 
        /*
         * Step 3: Process LISTEN state
@@ -626,7 +620,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
        dreq->dreq_iss     = dccp_v4_init_sequence(skb);
        dreq->dreq_service = service;
 
-       if (dccp_v4_send_response(sk, req, NULL))
+       if (dccp_v4_send_response(sk, req))
                goto drop_and_free;
 
        inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
@@ -746,8 +740,8 @@ int dccp_invalid_packet(struct sk_buff *skb)
         * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet
         * has short sequence numbers), drop packet and return
         */
-       if (dh->dccph_type >= DCCP_PKT_DATA    &&
-           dh->dccph_type <= DCCP_PKT_DATAACK && dh->dccph_x == 0)  {
+       if ((dh->dccph_type < DCCP_PKT_DATA    ||
+           dh->dccph_type > DCCP_PKT_DATAACK) && dh->dccph_x == 0)  {
                DCCP_WARN("P.type (%s) not Data || [Data]Ack, while P.X == 0\n",
                          dccp_packet_name(dh->dccph_type));
                return 1;
@@ -817,9 +811,8 @@ static int dccp_v4_rcv(struct sk_buff *skb)
 
        /* Step 2:
         *      Look up flow ID in table and get corresponding socket */
-       sk = __inet_lookup(&init_net, &dccp_hashinfo,
-                          iph->saddr, dh->dccph_sport,
-                          iph->daddr, dh->dccph_dport, inet_iif(skb));
+       sk = __inet_lookup_skb(&dccp_hashinfo, skb,
+                              dh->dccph_sport, dh->dccph_dport);
        /*
         * Step 2:
         *      If no socket ...
@@ -898,6 +891,7 @@ static struct inet_connection_sock_af_ops dccp_ipv4_af_ops = {
        .getsockopt        = ip_getsockopt,
        .addr2sockaddr     = inet_csk_addr2sockaddr,
        .sockaddr_len      = sizeof(struct sockaddr_in),
+       .bind_conflict     = inet_csk_bind_conflict,
 #ifdef CONFIG_COMPAT
        .compat_setsockopt = compat_ip_setsockopt,
        .compat_getsockopt = compat_ip_getsockopt,
@@ -922,8 +916,6 @@ static struct timewait_sock_ops dccp_timewait_sock_ops = {
        .twsk_obj_size  = sizeof(struct inet_timewait_sock),
 };
 
-DEFINE_PROTO_INUSE(dccp_v4)
-
 static struct proto dccp_v4_prot = {
        .name                   = "DCCP",
        .owner                  = THIS_MODULE,
@@ -937,10 +929,10 @@ static struct proto dccp_v4_prot = {
        .sendmsg                = dccp_sendmsg,
        .recvmsg                = dccp_recvmsg,
        .backlog_rcv            = dccp_v4_do_rcv,
-       .hash                   = dccp_hash,
-       .unhash                 = dccp_unhash,
+       .hash                   = inet_hash,
+       .unhash                 = inet_unhash,
        .accept                 = inet_csk_accept,
-       .get_port               = dccp_v4_get_port,
+       .get_port               = inet_csk_get_port,
        .shutdown               = dccp_shutdown,
        .destroy                = dccp_destroy_sock,
        .orphan_count           = &dccp_orphan_count,
@@ -948,17 +940,18 @@ static struct proto dccp_v4_prot = {
        .obj_size               = sizeof(struct dccp_sock),
        .rsk_prot               = &dccp_request_sock_ops,
        .twsk_prot              = &dccp_timewait_sock_ops,
+       .h.hashinfo             = &dccp_hashinfo,
 #ifdef CONFIG_COMPAT
        .compat_setsockopt      = compat_dccp_setsockopt,
        .compat_getsockopt      = compat_dccp_getsockopt,
 #endif
-       REF_PROTO_INUSE(dccp_v4)
 };
 
 static struct net_protocol dccp_v4_protocol = {
        .handler        = dccp_v4_rcv,
        .err_handler    = dccp_v4_err,
        .no_policy      = 1,
+       .netns_ok       = 1,
 };
 
 static const struct proto_ops inet_dccp_ops = {
@@ -998,6 +991,25 @@ static struct inet_protosw dccp_v4_protosw = {
        .flags          = INET_PROTOSW_ICSK,
 };
 
+static int dccp_v4_init_net(struct net *net)
+{
+       int err;
+
+       err = inet_ctl_sock_create(&net->dccp.v4_ctl_sk, PF_INET,
+                                  SOCK_DCCP, IPPROTO_DCCP, net);
+       return err;
+}
+
+static void dccp_v4_exit_net(struct net *net)
+{
+       inet_ctl_sock_destroy(net->dccp.v4_ctl_sk);
+}
+
+static struct pernet_operations dccp_v4_ops = {
+       .init   = dccp_v4_init_net,
+       .exit   = dccp_v4_exit_net,
+};
+
 static int __init dccp_v4_init(void)
 {
        int err = proto_register(&dccp_v4_prot, 1);
@@ -1011,13 +1023,12 @@ static int __init dccp_v4_init(void)
 
        inet_register_protosw(&dccp_v4_protosw);
 
-       err = inet_csk_ctl_sock_create(&dccp_v4_ctl_socket, PF_INET,
-                                      SOCK_DCCP, IPPROTO_DCCP);
+       err = register_pernet_subsys(&dccp_v4_ops);
        if (err)
-               goto out_unregister_protosw;
+               goto out_destroy_ctl_sock;
 out:
        return err;
-out_unregister_protosw:
+out_destroy_ctl_sock:
        inet_unregister_protosw(&dccp_v4_protosw);
        inet_del_protocol(&dccp_v4_protocol, IPPROTO_DCCP);
 out_proto_unregister:
@@ -1027,6 +1038,7 @@ out_proto_unregister:
 
 static void __exit dccp_v4_exit(void)
 {
+       unregister_pernet_subsys(&dccp_v4_ops);
        inet_unregister_protosw(&dccp_v4_protosw);
        inet_del_protocol(&dccp_v4_protocol, IPPROTO_DCCP);
        proto_unregister(&dccp_v4_prot);