X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=net%2Fdccp%2Fipv4.c;h=d9b11ef8694c1732b609d21782141eca71dcfcce;hb=a939406fda8ddc7de69ee9186356d09dc6daaa2c;hp=0a5d68dbb41879a5a55f6ebe1a85702fb13769d9;hpb=72a3effaf633bcae9034b7e176bdbd78d64a71db;p=safe%2Fjmp%2Flinux-2.6 diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 0a5d68d..d9b11ef 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -32,17 +33,10 @@ #include "feat.h" /* - * This is the global socket data structure used for responding to + * The per-net dccp.v4_ctl_sk socket is used for responding to * the Out-of-the-blue (OOTB) packets. A control sock will be created * for this socket at the initialization time. */ -static struct socket *dccp_v4_ctl_socket; - -static int dccp_v4_get_port(struct sock *sk, const unsigned short snum) -{ - return inet_csk_get_port(&dccp_hashinfo, sk, snum, - inet_csk_bind_conflict); -} int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { @@ -69,10 +63,10 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) nexthop = inet->opt->faddr; } - tmp = ip_route_connect(&rt, nexthop, inet->saddr, + tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_DCCP, - inet->sport, usin->sin_port, sk); + inet->inet_sport, usin->sin_port, sk, 1); if (tmp < 0) return tmp; @@ -84,12 +78,12 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (inet->opt == NULL || !inet->opt->srr) daddr = rt->rt_dst; - if (inet->saddr == 0) - inet->saddr = rt->rt_src; - inet->rcv_saddr = inet->saddr; + if (inet->inet_saddr == 0) + inet->inet_saddr = rt->rt_src; + inet->inet_rcv_saddr = inet->inet_saddr; - inet->dport = usin->sin_port; - inet->daddr = daddr; + inet->inet_dport = usin->sin_port; + inet->inet_daddr = daddr; inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet->opt != NULL) @@ -105,22 +99,19 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (err != 0) goto failure; - err = ip_route_newports(&rt, IPPROTO_DCCP, inet->sport, inet->dport, - sk); + err = ip_route_newports(&rt, IPPROTO_DCCP, inet->inet_sport, + inet->inet_dport, sk); if (err != 0) goto failure; /* OK, now commit destination to socket. */ sk_setup_caps(sk, &rt->u.dst); - dp->dccps_gar = - dp->dccps_iss = secure_dccp_sequence_number(inet->saddr, - inet->daddr, - inet->sport, - usin->sin_port); - dccp_update_gss(sk, dp->dccps_iss); - - inet->id = dp->dccps_iss ^ jiffies; + dp->dccps_iss = secure_dccp_sequence_number(inet->inet_saddr, + inet->inet_daddr, + inet->inet_sport, + inet->inet_dport); + inet->inet_id = dp->dccps_iss ^ jiffies; err = dccp_connect(sk); rt = NULL; @@ -135,7 +126,7 @@ failure: dccp_set_state(sk, DCCP_CLOSED); ip_rt_put(rt); sk->sk_route_caps = 0; - inet->dport = 0; + inet->inet_dport = 0; goto out; } @@ -162,7 +153,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, /* We don't check in the destentry if pmtu discovery is forbidden * on this route. We just assume that no packet_to_big packets * are send back when pmtu discovery is not active. - * There is a small race when the user changes this flag in the + * There is a small race when the user changes this flag in the * route, but I think that's acceptable. */ if ((dst = __sk_dst_check(sk, 0)) == NULL) @@ -193,86 +184,6 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, } /* else let the usual retransmit timer handle it */ } -static void dccp_v4_reqsk_send_ack(struct sk_buff *rxskb, - struct request_sock *req) -{ - int err; - struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; - const u32 dccp_hdr_ack_len = sizeof(struct dccp_hdr) + - sizeof(struct dccp_hdr_ext) + - sizeof(struct dccp_hdr_ack_bits); - struct sk_buff *skb; - - if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) - return; - - skb = alloc_skb(dccp_v4_ctl_socket->sk->sk_prot->max_header, GFP_ATOMIC); - if (skb == NULL) - return; - - /* Reserve space for headers. */ - skb_reserve(skb, dccp_v4_ctl_socket->sk->sk_prot->max_header); - - skb->dst = dst_clone(rxskb->dst); - - skb->h.raw = skb_push(skb, dccp_hdr_ack_len); - dh = dccp_hdr(skb); - memset(dh, 0, dccp_hdr_ack_len); - - /* Build DCCP header and checksum it. */ - dh->dccph_type = DCCP_PKT_ACK; - dh->dccph_sport = rxdh->dccph_dport; - dh->dccph_dport = rxdh->dccph_sport; - dh->dccph_doff = dccp_hdr_ack_len / 4; - dh->dccph_x = 1; - - dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), - DCCP_SKB_CB(rxskb)->dccpd_seq); - - bh_lock_sock(dccp_v4_ctl_socket->sk); - err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk, - rxskb->nh.iph->daddr, - rxskb->nh.iph->saddr, NULL); - bh_unlock_sock(dccp_v4_ctl_socket->sk); - - if (err == NET_XMIT_CN || err == 0) { - DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); - DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); - } -} - -static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, - struct dst_entry *dst) -{ - int err = -1; - struct sk_buff *skb; - - /* First, grab a route. */ - - if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) - goto out; - - skb = dccp_make_response(sk, dst, req); - if (skb != NULL) { - const struct inet_request_sock *ireq = inet_rsk(req); - struct dccp_hdr *dh = dccp_hdr(skb); - - dh->dccph_checksum = dccp_v4_checksum(skb, ireq->loc_addr, - ireq->rmt_addr); - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, - ireq->rmt_addr, - ireq->opt); - if (err == NET_XMIT_CN) - err = 0; - } - -out: - dst_release(dst); - return err; -} - /* * This routine is called by the ICMP module when it gets some sort of error * condition. If err < 0 then the socket should be closed and the error @@ -288,25 +199,28 @@ out: static void dccp_v4_err(struct sk_buff *skb, u32 info) { const struct iphdr *iph = (struct iphdr *)skb->data; - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + - (iph->ihl << 2)); + const u8 offset = iph->ihl << 2; + const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); struct dccp_sock *dp; struct inet_sock *inet; - const int type = skb->h.icmph->type; - const int code = skb->h.icmph->code; + const int type = icmp_hdr(skb)->type; + const int code = icmp_hdr(skb)->code; struct sock *sk; __u64 seq; int err; + struct net *net = dev_net(skb->dev); - if (skb->len < (iph->ihl << 2) + 8) { - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + if (skb->len < offset + sizeof(*dh) || + skb->len < offset + __dccp_basic_hdr_len(dh)) { + ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } - sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport, - iph->saddr, dh->dccph_sport, inet_iif(skb)); + sk = inet_lookup(net, &dccp_hashinfo, + iph->daddr, dh->dccph_dport, + iph->saddr, dh->dccph_sport, inet_iif(skb)); if (sk == NULL) { - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } @@ -320,16 +234,16 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) * servers this needs to be solved differently. */ if (sock_owned_by_user(sk)) - NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); + NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == DCCP_CLOSED) goto out; dp = dccp_sk(sk); - seq = dccp_hdr_seq(skb); - if (sk->sk_state != DCCP_LISTEN && - !between48(seq, dp->dccps_swl, dp->dccps_swh)) { - NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS); + seq = dccp_hdr_seq(dh); + if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && + !between48(seq, dp->dccps_awl, dp->dccps_awh)) { + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -373,10 +287,10 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) * ICMPs are not backlogged, hence we cannot get an established * socket here. */ - BUG_TRAP(!req->sk); + WARN_ON(req->sk); if (seq != dccp_rsk(req)->dreq_iss) { - NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } /* @@ -429,115 +343,33 @@ out: sock_put(sk); } -/* This routine computes an IPv4 DCCP checksum. */ -void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) +static inline __sum16 dccp_v4_csum_finish(struct sk_buff *skb, + __be32 src, __be32 dst) +{ + return csum_tcpudp_magic(src, dst, skb->len, IPPROTO_DCCP, skb->csum); +} + +void dccp_v4_send_check(struct sock *sk, struct sk_buff *skb) { const struct inet_sock *inet = inet_sk(sk); struct dccp_hdr *dh = dccp_hdr(skb); - dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr, inet->daddr); + dccp_csum_outgoing(skb); + dh->dccph_checksum = dccp_v4_csum_finish(skb, + inet->inet_saddr, + inet->inet_daddr); } EXPORT_SYMBOL_GPL(dccp_v4_send_check); -static inline u64 dccp_v4_init_sequence(const struct sock *sk, - const struct sk_buff *skb) +static inline u64 dccp_v4_init_sequence(const struct sk_buff *skb) { - return secure_dccp_sequence_number(skb->nh.iph->daddr, - skb->nh.iph->saddr, + return secure_dccp_sequence_number(ip_hdr(skb)->daddr, + ip_hdr(skb)->saddr, dccp_hdr(skb)->dccph_dport, dccp_hdr(skb)->dccph_sport); } -static struct request_sock_ops dccp_request_sock_ops; - -int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) -{ - struct inet_request_sock *ireq; - struct dccp_sock dp; - struct request_sock *req; - struct dccp_request_sock *dreq; - const __be32 saddr = skb->nh.iph->saddr; - const __be32 daddr = skb->nh.iph->daddr; - const __be32 service = dccp_hdr_request(skb)->dccph_req_service; - struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); - __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; - - /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ - if (((struct rtable *)skb->dst)->rt_flags & - (RTCF_BROADCAST | RTCF_MULTICAST)) { - reset_code = DCCP_RESET_CODE_NO_CONNECTION; - goto drop; - } - - if (dccp_bad_service_code(sk, service)) { - reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; - goto drop; - } - /* - * TW buckets are converted to open requests without - * limitations, they conserve resources and peer is - * evidently real one. - */ - if (inet_csk_reqsk_queue_is_full(sk)) - goto drop; - - /* - * Accept backlog is full. If we have already queued enough - * of warm entries in syn queue, drop request. It is better than - * clogging syn queue with openreqs with exponentially increasing - * timeout. - */ - if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) - goto drop; - - req = reqsk_alloc(&dccp_request_sock_ops); - if (req == NULL) - goto drop; - - if (dccp_parse_options(sk, skb)) - goto drop_and_free; - - dccp_openreq_init(req, &dp, skb); - - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_free; - - ireq = inet_rsk(req); - ireq->loc_addr = daddr; - ireq->rmt_addr = saddr; - req->rcv_wnd = dccp_feat_default_sequence_window; - ireq->opt = NULL; - - /* - * Step 3: Process LISTEN state - * - * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie - * - * In fact we defer setting S.GSR, S.SWL, S.SWH to - * dccp_create_openreq_child. - */ - dreq = dccp_rsk(req); - dreq->dreq_isr = dcb->dccpd_seq; - dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); - dreq->dreq_service = service; - - if (dccp_v4_send_response(sk, req, NULL)) - goto drop_and_free; - - inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); - return 0; - -drop_and_free: - reqsk_free(req); -drop: - DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); - dcb->dccpd_reset_code = reset_code; - return -1; -} - -EXPORT_SYMBOL_GPL(dccp_v4_conn_request); - /* * The three way handshake has completed - we got a valid ACK or DATAACK - * now create the new socket. @@ -550,7 +382,6 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, { struct inet_request_sock *ireq; struct inet_sock *newinet; - struct dccp_sock *newdp; struct sock *newsk; if (sk_acceptq_is_full(sk)) @@ -565,29 +396,28 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, sk_setup_caps(newsk, dst); - newdp = dccp_sk(newsk); newinet = inet_sk(newsk); ireq = inet_rsk(req); - newinet->daddr = ireq->rmt_addr; - newinet->rcv_saddr = ireq->loc_addr; - newinet->saddr = ireq->loc_addr; + newinet->inet_daddr = ireq->rmt_addr; + newinet->inet_rcv_saddr = ireq->loc_addr; + newinet->inet_saddr = ireq->loc_addr; newinet->opt = ireq->opt; ireq->opt = NULL; newinet->mc_index = inet_iif(skb); - newinet->mc_ttl = skb->nh.iph->ttl; - newinet->id = jiffies; + newinet->mc_ttl = ip_hdr(skb)->ttl; + newinet->inet_id = jiffies; dccp_sync_mss(newsk, dst_mtu(dst)); - __inet_hash(&dccp_hashinfo, newsk, 0); - __inet_inherit_port(&dccp_hashinfo, sk, newsk); + __inet_hash_nolisten(newsk, NULL); + __inet_inherit_port(sk, newsk); return newsk; exit_overflow: - NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); exit: - NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); dst_release(dst); return NULL; } @@ -597,7 +427,7 @@ EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock); static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) { const struct dccp_hdr *dh = dccp_hdr(skb); - const struct iphdr *iph = skb->nh.iph; + const struct iphdr *iph = ip_hdr(skb); struct sock *nsk; struct request_sock **prev; /* Find possible connection requests. */ @@ -607,7 +437,7 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) if (req != NULL) return dccp_check_req(sk, skb, req, prev); - nsk = inet_lookup_established(&dccp_hashinfo, + nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo, iph->saddr, dh->dccph_sport, iph->daddr, dh->dccph_dport, inet_iif(skb)); @@ -623,135 +453,95 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) return sk; } -int dccp_v4_checksum(const struct sk_buff *skb, const __be32 saddr, - const __be32 daddr) -{ - const struct dccp_hdr* dh = dccp_hdr(skb); - int checksum_len; - u32 tmp; - - if (dh->dccph_cscov == 0) - checksum_len = skb->len; - else { - checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); - checksum_len = checksum_len < skb->len ? checksum_len : - skb->len; - } - - tmp = csum_partial((unsigned char *)dh, checksum_len, 0); - return csum_tcpudp_magic(saddr, daddr, checksum_len, - IPPROTO_DCCP, tmp); -} - -EXPORT_SYMBOL_GPL(dccp_v4_checksum); - -static int dccp_v4_verify_checksum(struct sk_buff *skb, - const __be32 saddr, const __be32 daddr) -{ - struct dccp_hdr *dh = dccp_hdr(skb); - int checksum_len; - u32 tmp; - - if (dh->dccph_cscov == 0) - checksum_len = skb->len; - else { - checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); - checksum_len = checksum_len < skb->len ? checksum_len : - skb->len; - } - tmp = csum_partial((unsigned char *)dh, checksum_len, 0); - return csum_tcpudp_magic(saddr, daddr, checksum_len, - IPPROTO_DCCP, tmp) == 0 ? 0 : -1; -} - -static struct dst_entry* dccp_v4_route_skb(struct sock *sk, +static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, struct sk_buff *skb) { struct rtable *rt; - struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif, + struct flowi fl = { .oif = skb_rtable(skb)->rt_iif, .nl_u = { .ip4_u = - { .daddr = skb->nh.iph->saddr, - .saddr = skb->nh.iph->daddr, + { .daddr = ip_hdr(skb)->saddr, + .saddr = ip_hdr(skb)->daddr, .tos = RT_CONN_FLAGS(sk) } }, .proto = sk->sk_protocol, .uli_u = { .ports = { .sport = dccp_hdr(skb)->dccph_dport, .dport = dccp_hdr(skb)->dccph_sport } - } + } }; security_skb_classify_flow(skb, &fl); - if (ip_route_output_flow(&rt, &fl, sk, 0)) { - IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); + if (ip_route_output_flow(net, &rt, &fl, sk, 0)) { + IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } return &rt->u.dst; } -static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) +static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, + struct request_values *rv_unused) +{ + int err = -1; + struct sk_buff *skb; + struct dst_entry *dst; + + dst = inet_csk_route_req(sk, req); + if (dst == NULL) + goto out; + + skb = dccp_make_response(sk, dst, req); + if (skb != NULL) { + const struct inet_request_sock *ireq = inet_rsk(req); + struct dccp_hdr *dh = dccp_hdr(skb); + + dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->loc_addr, + ireq->rmt_addr); + err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, + ireq->rmt_addr, + ireq->opt); + err = net_xmit_eval(err); + } + +out: + dst_release(dst); + return err; +} + +static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) { int err; - struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; - const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) + - sizeof(struct dccp_hdr_ext) + - sizeof(struct dccp_hdr_reset); + const struct iphdr *rxiph; struct sk_buff *skb; struct dst_entry *dst; - u64 seqno; + struct net *net = dev_net(skb_dst(rxskb)->dev); + struct sock *ctl_sk = net->dccp.v4_ctl_sk; /* Never send a reset in response to a reset. */ - if (rxdh->dccph_type == DCCP_PKT_RESET) + if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET) return; - if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) + if (skb_rtable(rxskb)->rt_type != RTN_LOCAL) return; - dst = dccp_v4_route_skb(dccp_v4_ctl_socket->sk, rxskb); + dst = dccp_v4_route_skb(net, ctl_sk, rxskb); if (dst == NULL) return; - skb = alloc_skb(dccp_v4_ctl_socket->sk->sk_prot->max_header, - GFP_ATOMIC); + skb = dccp_ctl_make_reset(ctl_sk, rxskb); if (skb == NULL) goto out; - /* Reserve space for headers. */ - skb_reserve(skb, dccp_v4_ctl_socket->sk->sk_prot->max_header); - skb->dst = dst_clone(dst); + rxiph = ip_hdr(rxskb); + dccp_hdr(skb)->dccph_checksum = dccp_v4_csum_finish(skb, rxiph->saddr, + rxiph->daddr); + skb_dst_set(skb, dst_clone(dst)); - skb->h.raw = skb_push(skb, dccp_hdr_reset_len); - dh = dccp_hdr(skb); - memset(dh, 0, dccp_hdr_reset_len); - - /* Build DCCP header and checksum it. */ - dh->dccph_type = DCCP_PKT_RESET; - dh->dccph_sport = rxdh->dccph_dport; - dh->dccph_dport = rxdh->dccph_sport; - dh->dccph_doff = dccp_hdr_reset_len / 4; - dh->dccph_x = 1; - dccp_hdr_reset(skb)->dccph_reset_code = - DCCP_SKB_CB(rxskb)->dccpd_reset_code; - - /* See "8.3.1. Abnormal Termination" in RFC 4340 */ - seqno = 0; - if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) - dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1); - - dccp_hdr_set_seq(dh, seqno); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), - DCCP_SKB_CB(rxskb)->dccpd_seq); - - dh->dccph_checksum = dccp_v4_checksum(skb, rxskb->nh.iph->saddr, - rxskb->nh.iph->daddr); - - bh_lock_sock(dccp_v4_ctl_socket->sk); - err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk, - rxskb->nh.iph->daddr, - rxskb->nh.iph->saddr, NULL); - bh_unlock_sock(dccp_v4_ctl_socket->sk); - - if (err == NET_XMIT_CN || err == 0) { + bh_lock_sock(ctl_sk); + err = ip_build_and_send_pkt(skb, ctl_sk, + rxiph->daddr, rxiph->saddr, NULL); + bh_unlock_sock(ctl_sk); + + if (net_xmit_eval(err) == 0) { DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); } @@ -759,6 +549,100 @@ out: dst_release(dst); } +static void dccp_v4_reqsk_destructor(struct request_sock *req) +{ + dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg); + kfree(inet_rsk(req)->opt); +} + +static struct request_sock_ops dccp_request_sock_ops __read_mostly = { + .family = PF_INET, + .obj_size = sizeof(struct dccp_request_sock), + .rtx_syn_ack = dccp_v4_send_response, + .send_ack = dccp_reqsk_send_ack, + .destructor = dccp_v4_reqsk_destructor, + .send_reset = dccp_v4_ctl_send_reset, +}; + +int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) +{ + struct inet_request_sock *ireq; + struct request_sock *req; + struct dccp_request_sock *dreq; + const __be32 service = dccp_hdr_request(skb)->dccph_req_service; + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); + + /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ + if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) + return 0; /* discard, don't send a reset here */ + + if (dccp_bad_service_code(sk, service)) { + dcb->dccpd_reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; + goto drop; + } + /* + * TW buckets are converted to open requests without + * limitations, they conserve resources and peer is + * evidently real one. + */ + dcb->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; + if (inet_csk_reqsk_queue_is_full(sk)) + goto drop; + + /* + * Accept backlog is full. If we have already queued enough + * of warm entries in syn queue, drop request. It is better than + * clogging syn queue with openreqs with exponentially increasing + * timeout. + */ + if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) + goto drop; + + req = inet_reqsk_alloc(&dccp_request_sock_ops); + if (req == NULL) + goto drop; + + if (dccp_reqsk_init(req, dccp_sk(sk), skb)) + goto drop_and_free; + + dreq = dccp_rsk(req); + if (dccp_parse_options(sk, dreq, skb)) + goto drop_and_free; + + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + + ireq = inet_rsk(req); + ireq->loc_addr = ip_hdr(skb)->daddr; + ireq->rmt_addr = ip_hdr(skb)->saddr; + + /* + * Step 3: Process LISTEN state + * + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie + * + * In fact we defer setting S.GSR, S.SWL, S.SWH to + * dccp_create_openreq_child. + */ + dreq->dreq_isr = dcb->dccpd_seq; + dreq->dreq_iss = dccp_v4_init_sequence(skb); + dreq->dreq_service = service; + + if (dccp_v4_send_response(sk, req, NULL)) + goto drop_and_free; + + inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); + return 0; + +drop_and_free: + reqsk_free(req); +drop: + DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); + return -1; +} + +EXPORT_SYMBOL_GPL(dccp_v4_conn_request); + int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) { struct dccp_hdr *dh = dccp_hdr(skb); @@ -771,24 +655,23 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) /* * Step 3: Process LISTEN state - * If S.state == LISTEN, - * If P.type == Request or P contains a valid Init Cookie - * option, - * * Must scan the packet's options to check for an Init - * Cookie. Only the Init Cookie is processed here, - * however; other options are processed in Step 8. This - * scan need only be performed if the endpoint uses Init - * Cookies * - * * Generate a new socket and switch to that socket * - * Set S := new socket for this port pair - * S.state = RESPOND - * Choose S.ISS (initial seqno) or set from Init Cookie - * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie - * Continue with S.state == RESPOND - * * A Response packet will be generated in Step 11 * - * Otherwise, - * Generate Reset(No Connection) unless P.type == Reset - * Drop packet and return + * If P.type == Request or P contains a valid Init Cookie option, + * (* Must scan the packet's options to check for Init + * Cookies. Only Init Cookies are processed here, + * however; other options are processed in Step 8. This + * scan need only be performed if the endpoint uses Init + * Cookies *) + * (* Generate a new socket and switch to that socket *) + * Set S := new socket for this port pair + * S.state = RESPOND + * Choose S.ISS (initial seqno) or set from Init Cookies + * Initialize S.GAR := S.ISS + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies + * Continue with S.state == RESPOND + * (* A Response packet will be generated in Step 11 *) + * Otherwise, + * Generate Reset(No Connection) unless P.type == Reset + * Drop packet and return * * NOTE: the check for the packet types is done in * dccp_rcv_state_process @@ -811,7 +694,7 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; reset: - dccp_v4_ctl_send_reset(skb); + dccp_v4_ctl_send_reset(sk, skb); discard: kfree_skb(skb); return 0; @@ -819,60 +702,74 @@ discard: EXPORT_SYMBOL_GPL(dccp_v4_do_rcv); +/** + * dccp_invalid_packet - check for malformed packets + * Implements RFC 4340, 8.5: Step 1: Check header basics + * Packets that fail these checks are ignored and do not receive Resets. + */ int dccp_invalid_packet(struct sk_buff *skb) { const struct dccp_hdr *dh; + unsigned int cscov; if (skb->pkt_type != PACKET_HOST) return 1; + /* If the packet is shorter than 12 bytes, drop packet and return */ if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: pskb_may_pull failed\n"); + DCCP_WARN("pskb_may_pull failed\n"); return 1; } dh = dccp_hdr(skb); - /* If the packet type is not understood, drop packet and return */ + /* If P.type is not understood, drop packet and return */ if (dh->dccph_type >= DCCP_PKT_INVALID) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: invalid packet type\n"); + DCCP_WARN("invalid packet type\n"); return 1; } /* - * If P.Data Offset is too small for packet type, or too large for - * packet, drop packet and return + * If P.Data Offset is too small for packet type, drop packet and return */ if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) " - "too small 1\n", - dh->dccph_doff); + DCCP_WARN("P.Data Offset(%u) too small\n", dh->dccph_doff); return 1; } - + /* + * If P.Data Offset is too too large for packet, drop packet and return + */ if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) " - "too small 2\n", - dh->dccph_doff); + DCCP_WARN("P.Data Offset(%u) too large\n", dh->dccph_doff); return 1; } - dh = dccp_hdr(skb); - /* * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet * has short sequence numbers), drop packet and return */ - if (dh->dccph_x == 0 && - dh->dccph_type != DCCP_PKT_DATA && - dh->dccph_type != DCCP_PKT_ACK && - dh->dccph_type != DCCP_PKT_DATAACK) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.type (%s) not Data, Ack " - "nor DataAck and P.X == 0\n", - dccp_packet_name(dh->dccph_type)); + if ((dh->dccph_type < DCCP_PKT_DATA || + dh->dccph_type > DCCP_PKT_DATAACK) && dh->dccph_x == 0) { + DCCP_WARN("P.type (%s) not Data || [Data]Ack, while P.X == 0\n", + dccp_packet_name(dh->dccph_type)); + return 1; + } + + /* + * If P.CsCov is too large for the packet size, drop packet and return. + * This must come _before_ checksumming (not as RFC 4340 suggests). + */ + cscov = dccp_csum_coverage(skb); + if (cscov > skb->len) { + DCCP_WARN("P.CsCov %u exceeds packet length %d\n", + dh->dccph_cscov, skb->len); return 1; } + /* If header checksum is incorrect, drop packet and return. + * (This step is completed in the AF-dependent functions.) */ + skb->csum = skb_checksum(skb, 0, cscov, 0); + return 0; } @@ -882,32 +779,31 @@ EXPORT_SYMBOL_GPL(dccp_invalid_packet); static int dccp_v4_rcv(struct sk_buff *skb) { const struct dccp_hdr *dh; + const struct iphdr *iph; struct sock *sk; + int min_cov; - /* Step 1: Check header basics: */ + /* Step 1: Check header basics */ if (dccp_invalid_packet(skb)) goto discard_it; - /* If the header checksum is incorrect, drop packet and return */ - if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr, - skb->nh.iph->daddr) < 0) { - LIMIT_NETDEBUG(KERN_WARNING "%s: incorrect header checksum\n", - __FUNCTION__); + iph = ip_hdr(skb); + /* Step 1: If header checksum is incorrect, drop packet and return */ + if (dccp_v4_csum_finish(skb, iph->saddr, iph->daddr)) { + DCCP_WARN("dropped packet with invalid checksum\n"); goto discard_it; } dh = dccp_hdr(skb); - DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); + DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(dh); DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; - dccp_pr_debug("%8.8s " - "src=%u.%u.%u.%u@%-5d " - "dst=%u.%u.%u.%u@%-5d seq=%llu", + dccp_pr_debug("%8.8s src=%pI4@%-5d dst=%pI4@%-5d seq=%llu", dccp_packet_name(dh->dccph_type), - NIPQUAD(skb->nh.iph->saddr), ntohs(dh->dccph_sport), - NIPQUAD(skb->nh.iph->daddr), ntohs(dh->dccph_dport), + &iph->saddr, ntohs(dh->dccph_sport), + &iph->daddr, ntohs(dh->dccph_dport), (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq); if (dccp_packet_without_ack(skb)) { @@ -915,23 +811,17 @@ static int dccp_v4_rcv(struct sk_buff *skb) dccp_pr_debug_cat("\n"); } else { DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); - dccp_pr_debug_cat(", ack=%llu\n", - (unsigned long long) + dccp_pr_debug_cat(", ack=%llu\n", (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq); } /* Step 2: - * Look up flow ID in table and get corresponding socket */ - sk = __inet_lookup(&dccp_hashinfo, - skb->nh.iph->saddr, dh->dccph_sport, - skb->nh.iph->daddr, dh->dccph_dport, - inet_iif(skb)); - - /* + * Look up flow ID in table and get corresponding socket */ + sk = __inet_lookup_skb(&dccp_hashinfo, skb, + dh->dccph_sport, dh->dccph_dport); + /* * Step 2: - * If no socket ... - * Generate Reset(No Connection) unless P.type == Reset - * Drop packet and return + * If no socket ... */ if (sk == NULL) { dccp_pr_debug("failed to look up flow ID in table and " @@ -939,54 +829,64 @@ static int dccp_v4_rcv(struct sk_buff *skb) goto no_dccp_socket; } - /* + /* * Step 2: - * ... or S.state == TIMEWAIT, + * ... or S.state == TIMEWAIT, * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ - if (sk->sk_state == DCCP_TIME_WAIT) { - dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: " - "do_time_wait\n"); - goto do_time_wait; + dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: do_time_wait\n"); + inet_twsk_put(inet_twsk(sk)); + goto no_dccp_socket; + } + + /* + * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage + * o if MinCsCov = 0, only packets with CsCov = 0 are accepted + * o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov + */ + min_cov = dccp_sk(sk)->dccps_pcrlen; + if (dh->dccph_cscov && (min_cov == 0 || dh->dccph_cscov < min_cov)) { + dccp_pr_debug("Packet CsCov %d does not satisfy MinCsCov %d\n", + dh->dccph_cscov, min_cov); + /* FIXME: "Such packets SHOULD be reported using Data Dropped + * options (Section 11.7) with Drop Code 0, Protocol + * Constraints." */ + goto discard_and_relse; } if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; nf_reset(skb); - return sk_receive_skb(sk, skb); + return sk_receive_skb(sk, skb, 1); no_dccp_socket: if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard_it; /* * Step 2: + * If no socket ... * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ if (dh->dccph_type != DCCP_PKT_RESET) { DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; - dccp_v4_ctl_send_reset(skb); + dccp_v4_ctl_send_reset(sk, skb); } discard_it: - /* Discard frame. */ kfree_skb(skb); return 0; discard_and_relse: sock_put(sk); goto discard_it; - -do_time_wait: - inet_twsk_put(inet_twsk(sk)); - goto no_dccp_socket; } -static struct inet_connection_sock_af_ops dccp_ipv4_af_ops = { +static const struct inet_connection_sock_af_ops dccp_ipv4_af_ops = { .queue_xmit = ip_queue_xmit, .send_check = dccp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, @@ -997,6 +897,7 @@ static struct inet_connection_sock_af_ops dccp_ipv4_af_ops = { .getsockopt = ip_getsockopt, .addr2sockaddr = inet_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in), + .bind_conflict = inet_csk_bind_conflict, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_ip_setsockopt, .compat_getsockopt = compat_ip_getsockopt, @@ -1017,20 +918,6 @@ static int dccp_v4_init_sock(struct sock *sk) return err; } -static void dccp_v4_reqsk_destructor(struct request_sock *req) -{ - kfree(inet_rsk(req)->opt); -} - -static struct request_sock_ops dccp_request_sock_ops _read_mostly = { - .family = PF_INET, - .obj_size = sizeof(struct dccp_request_sock), - .rtx_syn_ack = dccp_v4_send_response, - .send_ack = dccp_v4_reqsk_send_ack, - .destructor = dccp_v4_reqsk_destructor, - .send_reset = dccp_v4_ctl_send_reset, -}; - static struct timewait_sock_ops dccp_timewait_sock_ops = { .twsk_obj_size = sizeof(struct inet_timewait_sock), }; @@ -1048,27 +935,30 @@ static struct proto dccp_v4_prot = { .sendmsg = dccp_sendmsg, .recvmsg = dccp_recvmsg, .backlog_rcv = dccp_v4_do_rcv, - .hash = dccp_hash, - .unhash = dccp_unhash, + .hash = inet_hash, + .unhash = inet_unhash, .accept = inet_csk_accept, - .get_port = dccp_v4_get_port, + .get_port = inet_csk_get_port, .shutdown = dccp_shutdown, .destroy = dccp_destroy_sock, .orphan_count = &dccp_orphan_count, .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .rsk_prot = &dccp_request_sock_ops, .twsk_prot = &dccp_timewait_sock_ops, + .h.hashinfo = &dccp_hashinfo, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_dccp_setsockopt, .compat_getsockopt = compat_dccp_getsockopt, #endif }; -static struct net_protocol dccp_v4_protocol = { +static const struct net_protocol dccp_v4_protocol = { .handler = dccp_v4_rcv, .err_handler = dccp_v4_err, .no_policy = 1, + .netns_ok = 1, }; static const struct proto_ops inet_dccp_ops = { @@ -1103,11 +993,29 @@ static struct inet_protosw dccp_v4_protosw = { .protocol = IPPROTO_DCCP, .prot = &dccp_v4_prot, .ops = &inet_dccp_ops, - .capability = -1, .no_check = 0, .flags = INET_PROTOSW_ICSK, }; +static int __net_init dccp_v4_init_net(struct net *net) +{ + if (dccp_hashinfo.bhash == NULL) + return -ESOCKTNOSUPPORT; + + return inet_ctl_sock_create(&net->dccp.v4_ctl_sk, PF_INET, + SOCK_DCCP, IPPROTO_DCCP, net); +} + +static void __net_exit dccp_v4_exit_net(struct net *net) +{ + inet_ctl_sock_destroy(net->dccp.v4_ctl_sk); +} + +static struct pernet_operations dccp_v4_ops = { + .init = dccp_v4_init_net, + .exit = dccp_v4_exit_net, +}; + static int __init dccp_v4_init(void) { int err = proto_register(&dccp_v4_prot, 1); @@ -1121,13 +1029,12 @@ static int __init dccp_v4_init(void) inet_register_protosw(&dccp_v4_protosw); - err = inet_csk_ctl_sock_create(&dccp_v4_ctl_socket, PF_INET, - SOCK_DCCP, IPPROTO_DCCP); + err = register_pernet_subsys(&dccp_v4_ops); if (err) - goto out_unregister_protosw; + goto out_destroy_ctl_sock; out: return err; -out_unregister_protosw: +out_destroy_ctl_sock: inet_unregister_protosw(&dccp_v4_protosw); inet_del_protocol(&dccp_v4_protocol, IPPROTO_DCCP); out_proto_unregister: @@ -1137,6 +1044,7 @@ out_proto_unregister: static void __exit dccp_v4_exit(void) { + unregister_pernet_subsys(&dccp_v4_ops); inet_unregister_protosw(&dccp_v4_protosw); inet_del_protocol(&dccp_v4_protocol, IPPROTO_DCCP); proto_unregister(&dccp_v4_prot); @@ -1150,8 +1058,8 @@ module_exit(dccp_v4_exit); * values directly, Also cover the case where the protocol is not specified, * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP */ -MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6"); -MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 33, 6); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 0, 6); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arnaldo Carvalho de Melo "); MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");