*
* An implementation of the DCCP protocol
* Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ * Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
-#include <linux/config.h>
#include <linux/dccp.h>
#include <net/snmp.h>
#include <net/sock.h>
#include <net/tcp.h>
#include "ackvec.h"
+/*
+ * DCCP - specific warning and debugging macros.
+ */
+#define DCCP_WARN(fmt, a...) LIMIT_NETDEBUG(KERN_WARNING "%s: " fmt, \
+ __FUNCTION__, ##a)
+#define DCCP_CRIT(fmt, a...) printk(KERN_CRIT fmt " at %s:%d/%s()\n", ##a, \
+ __FILE__, __LINE__, __FUNCTION__)
+#define DCCP_BUG(a...) do { DCCP_CRIT("BUG: " a); dump_stack(); } while(0)
+#define DCCP_BUG_ON(cond) do { if (unlikely((cond) != 0)) \
+ DCCP_BUG("\"%s\" holds (exception!)", \
+ __stringify(cond)); \
+ } while (0)
+
+#define DCCP_PRINTK(enable, fmt, args...) do { if (enable) \
+ printk(fmt, ##args); \
+ } while(0)
+#define DCCP_PR_DEBUG(enable, fmt, a...) DCCP_PRINTK(enable, KERN_DEBUG \
+ "%s: " fmt, __FUNCTION__, ##a)
+
#ifdef CONFIG_IP_DCCP_DEBUG
extern int dccp_debug;
-
-#define dccp_pr_debug(format, a...) \
- do { if (dccp_debug) \
- printk(KERN_DEBUG "%s: " format, __FUNCTION__ , ##a); \
- } while (0)
-#define dccp_pr_debug_cat(format, a...) do { if (dccp_debug) \
- printk(format, ##a); } while (0)
+#define dccp_pr_debug(format, a...) DCCP_PR_DEBUG(dccp_debug, format, ##a)
+#define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a)
#else
#define dccp_pr_debug(format, a...)
#define dccp_pr_debug_cat(format, a...)
extern struct inet_hashinfo dccp_hashinfo;
extern atomic_t dccp_orphan_count;
-extern int dccp_tw_count;
-extern void dccp_tw_deschedule(struct inet_timewait_sock *tw);
extern void dccp_time_wait(struct sock *sk, int state, int timeo);
-/* FIXME: Right size this */
-#define DCCP_MAX_OPT_LEN 128
-
-#define DCCP_MAX_PACKET_HDR 32
-
-#define MAX_DCCP_HEADER (DCCP_MAX_PACKET_HDR + DCCP_MAX_OPT_LEN + MAX_HEADER)
+/*
+ * Set safe upper bounds for header and option length. Since Data Offset is 8
+ * bits (RFC 4340, sec. 5.1), the total header length can never be more than
+ * 4 * 255 = 1020 bytes. The largest possible header length is 28 bytes (X=1):
+ * - DCCP-Response with ACK Subheader and 4 bytes of Service code OR
+ * - DCCP-Reset with ACK Subheader and 4 bytes of Reset Code fields
+ * Hence a safe upper bound for the maximum option length is 1020-28 = 992
+ */
+#define MAX_DCCP_SPECIFIC_HEADER (255 * sizeof(int))
+#define DCCP_MAX_PACKET_HDR 28
+#define DCCP_MAX_OPT_LEN (MAX_DCCP_SPECIFIC_HEADER - DCCP_MAX_PACKET_HDR)
+#define MAX_DCCP_HEADER (MAX_DCCP_SPECIFIC_HEADER + MAX_HEADER)
#define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT
* state, about 60 seconds */
-/* draft-ietf-dccp-spec-11.txt initial RTO value */
+/* RFC 1122, 4.2.3.1 initial RTO value */
#define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ))
+#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
+
+/* bounds for sampled RTT values from packet exchanges (in usec) */
+#define DCCP_SANE_RTT_MIN 100
+#define DCCP_SANE_RTT_MAX (4 * USEC_PER_SEC)
+
/* Maximal interval between probes for local resources. */
#define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U))
-#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
+/* sysctl variables for DCCP */
+extern int sysctl_dccp_request_retries;
+extern int sysctl_dccp_retries1;
+extern int sysctl_dccp_retries2;
+extern int sysctl_dccp_feat_sequence_window;
+extern int sysctl_dccp_feat_rx_ccid;
+extern int sysctl_dccp_feat_tx_ccid;
+extern int sysctl_dccp_feat_ack_ratio;
+extern int sysctl_dccp_feat_send_ack_vector;
+extern int sysctl_dccp_feat_send_ndp_count;
+extern int sysctl_dccp_tx_qlen;
+
+/*
+ * 48-bit sequence number arithmetic (signed and unsigned)
+ */
+#define INT48_MIN 0x800000000000LL /* 2^47 */
+#define UINT48_MAX 0xFFFFFFFFFFFFLL /* 2^48 - 1 */
+#define COMPLEMENT48(x) (0x1000000000000LL - (x)) /* 2^48 - x */
+#define TO_SIGNED48(x) (((x) < INT48_MIN)? (x) : -COMPLEMENT48( (x)))
+#define TO_UNSIGNED48(x) (((x) >= 0)? (x) : COMPLEMENT48(-(x)))
+#define ADD48(a, b) (((a) + (b)) & UINT48_MAX)
+#define SUB48(a, b) ADD48((a), COMPLEMENT48(b))
+
+static inline void dccp_set_seqno(u64 *seqno, u64 value)
+{
+ *seqno = value & UINT48_MAX;
+}
+
+static inline void dccp_inc_seqno(u64 *seqno)
+{
+ *seqno = ADD48(*seqno, 1);
+}
+
+/* signed mod-2^48 distance: pos. if seqno1 < seqno2, neg. if seqno1 > seqno2 */
+static inline s64 dccp_delta_seqno(const u64 seqno1, const u64 seqno2)
+{
+ u64 delta = SUB48(seqno2, seqno1);
+
+ return TO_SIGNED48(delta);
+}
/* is seq1 < seq2 ? */
static inline int before48(const u64 seq1, const u64 seq2)
{
- return (s64)((seq1 << 16) - (seq2 << 16)) < 0;
+ return (s64)((seq2 << 16) - (seq1 << 16)) > 0;
}
/* is seq1 > seq2 ? */
-static inline int after48(const u64 seq1, const u64 seq2)
-{
- return (s64)((seq2 << 16) - (seq1 << 16)) < 0;
-}
+#define after48(seq1, seq2) before48(seq2, seq1)
/* is seq2 <= seq1 <= seq3 ? */
static inline int between48(const u64 seq1, const u64 seq2, const u64 seq3)
return after48(seq1, seq2) ? seq1 : seq2;
}
+/* is seq1 next seqno after seq2 */
+static inline int follows48(const u64 seq1, const u64 seq2)
+{
+ return dccp_delta_seqno(seq2, seq1) == 1;
+}
+
enum {
DCCP_MIB_NUM = 0,
DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */
DCCP_MIB_ESTABRESETS, /* EstabResets */
DCCP_MIB_CURRESTAB, /* CurrEstab */
- DCCP_MIB_OUTSEGS, /* OutSegs */
+ DCCP_MIB_OUTSEGS, /* OutSegs */
DCCP_MIB_OUTRSTS,
DCCP_MIB_ABORTONTIMEOUT,
DCCP_MIB_TIMEOUTS,
#define DCCP_ADD_STATS_USER(field, val) \
SNMP_ADD_STATS_USER(dccp_statistics, field, val)
+/*
+ * Checksumming routines
+ */
+static inline unsigned int dccp_csum_coverage(const struct sk_buff *skb)
+{
+ const struct dccp_hdr* dh = dccp_hdr(skb);
+
+ if (dh->dccph_cscov == 0)
+ return skb->len;
+ return (dh->dccph_doff + dh->dccph_cscov - 1) * sizeof(u32);
+}
+
+static inline void dccp_csum_outgoing(struct sk_buff *skb)
+{
+ unsigned int cov = dccp_csum_coverage(skb);
+
+ if (cov >= skb->len)
+ dccp_hdr(skb)->dccph_cscov = 0;
+
+ skb->csum = skb_checksum(skb, 0, (cov > skb->len)? skb->len : cov, 0);
+}
+
+extern void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
+
extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb);
-extern int dccp_send_response(struct sock *sk);
extern void dccp_send_ack(struct sock *sk);
extern void dccp_send_delayed_ack(struct sock *sk);
+extern void dccp_reqsk_send_ack(struct sk_buff *sk, struct request_sock *rsk);
+
extern void dccp_send_sync(struct sock *sk, const u64 seq,
const enum dccp_pkt_type pkt_type);
-extern int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo);
+extern void dccp_write_xmit(struct sock *sk, int block);
extern void dccp_write_space(struct sock *sk);
extern void dccp_init_xmit_timers(struct sock *sk);
extern void dccp_set_state(struct sock *sk, const int state);
extern void dccp_done(struct sock *sk);
-static inline void dccp_openreq_init(struct request_sock *req,
- struct dccp_sock *dp,
- struct sk_buff *skb)
-{
- /*
- * FIXME: fill in the other req fields from the DCCP options
- * received
- */
- inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport;
- inet_rsk(req)->acked = 0;
- req->rcv_wnd = 0;
-}
+extern void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb);
extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
const struct dccp_hdr *dh, const unsigned len);
-extern int dccp_init_sock(struct sock *sk);
+extern int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized);
extern int dccp_destroy_sock(struct sock *sk);
extern void dccp_close(struct sock *sk, long timeout);
char __user *optval, int __user *optlen);
extern int dccp_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, int optlen);
+#ifdef CONFIG_COMPAT
+extern int compat_dccp_getsockopt(struct sock *sk,
+ int level, int optname,
+ char __user *optval, int __user *optlen);
+extern int compat_dccp_setsockopt(struct sock *sk,
+ int level, int optname,
+ char __user *optval, int optlen);
+#endif
extern int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
extern int dccp_sendmsg(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg, size_t size);
extern int inet_dccp_listen(struct socket *sock, int backlog);
extern unsigned int dccp_poll(struct file *file, struct socket *sock,
poll_table *wait);
-extern void dccp_v4_send_check(struct sock *sk, int len,
- struct sk_buff *skb);
extern int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len);
-extern int dccp_v4_checksum(const struct sk_buff *skb,
- const __be32 saddr, const __be32 daddr);
-
extern int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code);
extern void dccp_send_close(struct sock *sk, const int active);
extern int dccp_invalid_packet(struct sk_buff *skb);
+extern u32 dccp_sample_rtt(struct sock *sk, struct timeval *t_recv,
+ struct timeval *t_history);
static inline int dccp_bad_service_code(const struct sock *sk,
const __be32 service)
return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST;
}
-#define DCCP_MAX_SEQNO ((((u64)1) << 48) - 1)
-#define DCCP_PKT_WITHOUT_ACK_SEQ (DCCP_MAX_SEQNO << 2)
-
-static inline void dccp_set_seqno(u64 *seqno, u64 value)
-{
- if (value > DCCP_MAX_SEQNO)
- value -= DCCP_MAX_SEQNO + 1;
- *seqno = value;
-}
-
-static inline u64 dccp_delta_seqno(u64 seqno1, u64 seqno2)
-{
- return ((seqno2 << 16) - (seqno1 << 16)) >> 16;
-}
-
-static inline void dccp_inc_seqno(u64 *seqno)
-{
- if (++*seqno > DCCP_MAX_SEQNO)
- *seqno = 0;
-}
+#define DCCP_PKT_WITHOUT_ACK_SEQ (UINT48_MAX << 2)
static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss)
{
static inline void dccp_update_gsr(struct sock *sk, u64 seq)
{
struct dccp_sock *dp = dccp_sk(sk);
+ const struct dccp_minisock *dmsk = dccp_msk(sk);
dp->dccps_gsr = seq;
dccp_set_seqno(&dp->dccps_swl,
- (dp->dccps_gsr + 1 -
- (dp->dccps_options.dccpo_sequence_window / 4)));
+ dp->dccps_gsr + 1 - (dmsk->dccpms_sequence_window / 4));
dccp_set_seqno(&dp->dccps_swh,
- (dp->dccps_gsr +
- (3 * dp->dccps_options.dccpo_sequence_window) / 4));
+ dp->dccps_gsr + (3 * dmsk->dccpms_sequence_window) / 4);
}
static inline void dccp_update_gss(struct sock *sk, u64 seq)
dp->dccps_awh = dp->dccps_gss = seq;
dccp_set_seqno(&dp->dccps_awl,
(dp->dccps_gss -
- dp->dccps_options.dccpo_sequence_window + 1));
+ dccp_msk(sk)->dccpms_sequence_window + 1));
}
-
+
static inline int dccp_ack_pending(const struct sock *sk)
{
const struct dccp_sock *dp = dccp_sk(sk);
return dp->dccps_timestamp_echo != 0 ||
#ifdef CONFIG_IP_DCCP_ACKVEC
- (dp->dccps_options.dccpo_send_ack_vector &&
+ (dccp_msk(sk)->dccpms_send_ack_vector &&
dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) ||
#endif
inet_csk_ack_scheduled(sk);
}
-extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb);
-extern void dccp_insert_option_elapsed_time(struct sock *sk,
+extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb);
+extern int dccp_insert_option_elapsed_time(struct sock *sk,
struct sk_buff *skb,
u32 elapsed_time);
-extern void dccp_insert_option_timestamp(struct sock *sk,
+extern int dccp_insert_option_timestamp(struct sock *sk,
struct sk_buff *skb);
-extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb,
+extern int dccp_insert_option(struct sock *sk, struct sk_buff *skb,
unsigned char option,
const void *value, unsigned char len);
-extern struct socket *dccp_ctl_socket;
-
extern void dccp_timestamp(const struct sock *sk, struct timeval *tv);
static inline suseconds_t timeval_usecs(const struct timeval *tv)
tv->tv_sec--;
tv->tv_usec += USEC_PER_SEC;
}
+ DCCP_BUG_ON(tv->tv_sec < 0);
}
#ifdef CONFIG_SYSCTL