*/
#include <linux/dccp.h>
+#include <linux/ktime.h>
#include <net/snmp.h>
#include <net/sock.h>
#include <net/tcp.h>
#include "ackvec.h"
+/*
+ * DCCP - specific warning and debugging macros.
+ */
+#define DCCP_WARN(fmt, a...) LIMIT_NETDEBUG(KERN_WARNING "%s: " fmt, \
+ __func__, ##a)
+#define DCCP_CRIT(fmt, a...) printk(KERN_CRIT fmt " at %s:%d/%s()\n", ##a, \
+ __FILE__, __LINE__, __func__)
+#define DCCP_BUG(a...) do { DCCP_CRIT("BUG: " a); dump_stack(); } while(0)
+#define DCCP_BUG_ON(cond) do { if (unlikely((cond) != 0)) \
+ DCCP_BUG("\"%s\" holds (exception!)", \
+ __stringify(cond)); \
+ } while (0)
+
+#define DCCP_PRINTK(enable, fmt, args...) do { if (enable) \
+ printk(fmt, ##args); \
+ } while(0)
+#define DCCP_PR_DEBUG(enable, fmt, a...) DCCP_PRINTK(enable, KERN_DEBUG \
+ "%s: " fmt, __func__, ##a)
+
#ifdef CONFIG_IP_DCCP_DEBUG
extern int dccp_debug;
-
-#define dccp_pr_debug(format, a...) \
- do { if (dccp_debug) \
- printk(KERN_DEBUG "%s: " format, __FUNCTION__ , ##a); \
- } while (0)
-#define dccp_pr_debug_cat(format, a...) do { if (dccp_debug) \
- printk(format, ##a); } while (0)
+#define dccp_pr_debug(format, a...) DCCP_PR_DEBUG(dccp_debug, format, ##a)
+#define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a)
+#define dccp_debug(fmt, a...) dccp_pr_debug_cat(KERN_DEBUG fmt, ##a)
#else
#define dccp_pr_debug(format, a...)
#define dccp_pr_debug_cat(format, a...)
+#define dccp_debug(format, a...)
#endif
extern struct inet_hashinfo dccp_hashinfo;
-extern atomic_t dccp_orphan_count;
-extern int dccp_tw_count;
-extern void dccp_tw_deschedule(struct inet_timewait_sock *tw);
+extern struct percpu_counter dccp_orphan_count;
extern void dccp_time_wait(struct sock *sk, int state, int timeo);
-/* FIXME: Right size this */
-#define DCCP_MAX_OPT_LEN 128
-
-#define DCCP_MAX_PACKET_HDR 32
+/*
+ * Set safe upper bounds for header and option length. Since Data Offset is 8
+ * bits (RFC 4340, sec. 5.1), the total header length can never be more than
+ * 4 * 255 = 1020 bytes. The largest possible header length is 28 bytes (X=1):
+ * - DCCP-Response with ACK Subheader and 4 bytes of Service code OR
+ * - DCCP-Reset with ACK Subheader and 4 bytes of Reset Code fields
+ * Hence a safe upper bound for the maximum option length is 1020-28 = 992
+ */
+#define MAX_DCCP_SPECIFIC_HEADER (255 * sizeof(uint32_t))
+#define DCCP_MAX_PACKET_HDR 28
+#define DCCP_MAX_OPT_LEN (MAX_DCCP_SPECIFIC_HEADER - DCCP_MAX_PACKET_HDR)
+#define MAX_DCCP_HEADER (MAX_DCCP_SPECIFIC_HEADER + MAX_HEADER)
-#define MAX_DCCP_HEADER (DCCP_MAX_PACKET_HDR + DCCP_MAX_OPT_LEN + MAX_HEADER)
+/* Upper bound for initial feature-negotiation overhead (padded to 32 bits) */
+#define DCCP_FEATNEG_OVERHEAD (32 * sizeof(uint32_t))
#define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT
* state, about 60 seconds */
/* RFC 1122, 4.2.3.1 initial RTO value */
#define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ))
+/*
+ * The maximum back-off value for retransmissions. This is needed for
+ * - retransmitting client-Requests (sec. 8.1.1),
+ * - retransmitting Close/CloseReq when closing (sec. 8.3),
+ * - feature-negotiation retransmission (sec. 6.6.3),
+ * - Acks in client-PARTOPEN state (sec. 8.1.5).
+ */
+#define DCCP_RTO_MAX ((unsigned)(64 * HZ))
+
+/*
+ * RTT sampling: sanity bounds and fallback RTT value from RFC 4340, section 3.4
+ */
+#define DCCP_SANE_RTT_MIN 100
+#define DCCP_FALLBACK_RTT (USEC_PER_SEC / 5)
+#define DCCP_SANE_RTT_MAX (3 * USEC_PER_SEC)
+
/* Maximal interval between probes for local resources. */
#define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U))
-#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
+/* sysctl variables for DCCP */
+extern int sysctl_dccp_request_retries;
+extern int sysctl_dccp_retries1;
+extern int sysctl_dccp_retries2;
+extern int sysctl_dccp_tx_qlen;
+extern int sysctl_dccp_sync_ratelimit;
+
+/*
+ * 48-bit sequence number arithmetic (signed and unsigned)
+ */
+#define INT48_MIN 0x800000000000LL /* 2^47 */
+#define UINT48_MAX 0xFFFFFFFFFFFFLL /* 2^48 - 1 */
+#define COMPLEMENT48(x) (0x1000000000000LL - (x)) /* 2^48 - x */
+#define TO_SIGNED48(x) (((x) < INT48_MIN)? (x) : -COMPLEMENT48( (x)))
+#define TO_UNSIGNED48(x) (((x) >= 0)? (x) : COMPLEMENT48(-(x)))
+#define ADD48(a, b) (((a) + (b)) & UINT48_MAX)
+#define SUB48(a, b) ADD48((a), COMPLEMENT48(b))
+
+static inline void dccp_set_seqno(u64 *seqno, u64 value)
+{
+ *seqno = value & UINT48_MAX;
+}
+
+static inline void dccp_inc_seqno(u64 *seqno)
+{
+ *seqno = ADD48(*seqno, 1);
+}
+
+/* signed mod-2^48 distance: pos. if seqno1 < seqno2, neg. if seqno1 > seqno2 */
+static inline s64 dccp_delta_seqno(const u64 seqno1, const u64 seqno2)
+{
+ u64 delta = SUB48(seqno2, seqno1);
+
+ return TO_SIGNED48(delta);
+}
/* is seq1 < seq2 ? */
static inline int before48(const u64 seq1, const u64 seq2)
{
- return (s64)((seq1 << 16) - (seq2 << 16)) < 0;
+ return (s64)((seq2 << 16) - (seq1 << 16)) > 0;
}
/* is seq1 > seq2 ? */
-static inline int after48(const u64 seq1, const u64 seq2)
-{
- return (s64)((seq2 << 16) - (seq1 << 16)) < 0;
-}
+#define after48(seq1, seq2) before48(seq2, seq1)
/* is seq2 <= seq1 <= seq3 ? */
static inline int between48(const u64 seq1, const u64 seq2, const u64 seq3)
return after48(seq1, seq2) ? seq1 : seq2;
}
-/* is seq1 next seqno after seq2 */
-static inline int follows48(const u64 seq1, const u64 seq2)
+/**
+ * dccp_loss_free - Evaluates condition for data loss from RFC 4340, 7.7.1
+ * @s1: start sequence number
+ * @s2: end sequence number
+ * @ndp: NDP count on packet with sequence number @s2
+ * Returns true if the sequence range s1...s2 has no data loss.
+ */
+static inline bool dccp_loss_free(const u64 s1, const u64 s2, const u64 ndp)
{
- int diff = (seq1 & 0xFFFF) - (seq2 & 0xFFFF);
+ s64 delta = dccp_delta_seqno(s1, s2);
- return diff==1;
+ WARN_ON(delta < 0);
+ return (u64)delta <= ndp + 1;
}
enum {
DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */
DCCP_MIB_ESTABRESETS, /* EstabResets */
DCCP_MIB_CURRESTAB, /* CurrEstab */
- DCCP_MIB_OUTSEGS, /* OutSegs */
+ DCCP_MIB_OUTSEGS, /* OutSegs */
DCCP_MIB_OUTRSTS,
DCCP_MIB_ABORTONTIMEOUT,
DCCP_MIB_TIMEOUTS,
#define DCCP_ADD_STATS_USER(field, val) \
SNMP_ADD_STATS_USER(dccp_statistics, field, val)
-extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb);
+/*
+ * Checksumming routines
+ */
+static inline unsigned int dccp_csum_coverage(const struct sk_buff *skb)
+{
+ const struct dccp_hdr* dh = dccp_hdr(skb);
+
+ if (dh->dccph_cscov == 0)
+ return skb->len;
+ return (dh->dccph_doff + dh->dccph_cscov - 1) * sizeof(u32);
+}
+
+static inline void dccp_csum_outgoing(struct sk_buff *skb)
+{
+ unsigned int cov = dccp_csum_coverage(skb);
+
+ if (cov >= skb->len)
+ dccp_hdr(skb)->dccph_cscov = 0;
+
+ skb->csum = skb_checksum(skb, 0, (cov > skb->len)? skb->len : cov, 0);
+}
+
+extern void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
+
+extern int dccp_retransmit_skb(struct sock *sk);
extern void dccp_send_ack(struct sock *sk);
-extern void dccp_send_delayed_ack(struct sock *sk);
+extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *rsk);
+
extern void dccp_send_sync(struct sock *sk, const u64 seq,
const enum dccp_pkt_type pkt_type);
extern void dccp_set_state(struct sock *sk, const int state);
extern void dccp_done(struct sock *sk);
-static inline void dccp_openreq_init(struct request_sock *req,
- struct dccp_sock *dp,
- struct sk_buff *skb)
-{
- /*
- * FIXME: fill in the other req fields from the DCCP options
- * received
- */
- inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport;
- inet_rsk(req)->acked = 0;
- req->rcv_wnd = 0;
-}
+extern int dccp_reqsk_init(struct request_sock *rq, struct dccp_sock const *dp,
+ struct sk_buff const *skb);
extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
const struct dccp_hdr *dh, const unsigned len);
extern int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized);
-extern int dccp_destroy_sock(struct sock *sk);
+extern void dccp_destroy_sock(struct sock *sk);
extern void dccp_close(struct sock *sk, long timeout);
extern struct sk_buff *dccp_make_response(struct sock *sk,
extern int dccp_connect(struct sock *sk);
extern int dccp_disconnect(struct sock *sk, int flags);
-extern void dccp_hash(struct sock *sk);
-extern void dccp_unhash(struct sock *sk);
extern int dccp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
extern int dccp_setsockopt(struct sock *sk, int level, int optname,
extern int inet_dccp_listen(struct socket *sock, int backlog);
extern unsigned int dccp_poll(struct file *file, struct socket *sock,
poll_table *wait);
-extern void dccp_v4_send_check(struct sock *sk, int len,
- struct sk_buff *skb);
extern int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len);
-extern int dccp_v4_checksum(const struct sk_buff *skb,
- const __be32 saddr, const __be32 daddr);
-
+extern struct sk_buff *dccp_ctl_make_reset(struct sock *sk,
+ struct sk_buff *skb);
extern int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code);
extern void dccp_send_close(struct sock *sk, const int active);
extern int dccp_invalid_packet(struct sk_buff *skb);
+extern u32 dccp_sample_rtt(struct sock *sk, long delta);
static inline int dccp_bad_service_code(const struct sock *sk,
const __be32 service)
return !dccp_list_has_service(dp->dccps_service_list, service);
}
+/**
+ * dccp_skb_cb - DCCP per-packet control information
+ * @dccpd_type: one of %dccp_pkt_type (or unknown)
+ * @dccpd_ccval: CCVal field (5.1), see e.g. RFC 4342, 8.1
+ * @dccpd_reset_code: one of %dccp_reset_codes
+ * @dccpd_reset_data: Data1..3 fields (depend on @dccpd_reset_code)
+ * @dccpd_opt_len: total length of all options (5.8) in the packet
+ * @dccpd_seq: sequence number
+ * @dccpd_ack_seq: acknowledgment number subheader field value
+ * This is used for transmission as well as for reception.
+ */
struct dccp_skb_cb {
+ union {
+ struct inet_skb_parm h4;
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+ struct inet6_skb_parm h6;
+#endif
+ } header;
__u8 dccpd_type:4;
__u8 dccpd_ccval:4;
- __u8 dccpd_reset_code;
+ __u8 dccpd_reset_code,
+ dccpd_reset_data[3];
__u16 dccpd_opt_len;
__u64 dccpd_seq;
__u64 dccpd_ack_seq;
#define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0]))
+/* RFC 4340, sec. 7.7 */
static inline int dccp_non_data_packet(const struct sk_buff *skb)
{
const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
type == DCCP_PKT_SYNCACK;
}
-static inline int dccp_packet_without_ack(const struct sk_buff *skb)
+/* RFC 4340, sec. 7.7 */
+static inline int dccp_data_packet(const struct sk_buff *skb)
{
const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
- return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST;
+ return type == DCCP_PKT_DATA ||
+ type == DCCP_PKT_DATAACK ||
+ type == DCCP_PKT_REQUEST ||
+ type == DCCP_PKT_RESPONSE;
}
-#define DCCP_MAX_SEQNO ((((u64)1) << 48) - 1)
-#define DCCP_PKT_WITHOUT_ACK_SEQ (DCCP_MAX_SEQNO << 2)
-
-static inline void dccp_set_seqno(u64 *seqno, u64 value)
+static inline int dccp_packet_without_ack(const struct sk_buff *skb)
{
- if (value > DCCP_MAX_SEQNO)
- value -= DCCP_MAX_SEQNO + 1;
- *seqno = value;
-}
+ const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
-static inline u64 dccp_delta_seqno(u64 seqno1, u64 seqno2)
-{
- return ((seqno2 << 16) - (seqno1 << 16)) >> 16;
+ return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST;
}
-static inline void dccp_inc_seqno(u64 *seqno)
-{
- if (++*seqno > DCCP_MAX_SEQNO)
- *seqno = 0;
-}
+#define DCCP_PKT_WITHOUT_ACK_SEQ (UINT48_MAX << 2)
static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss)
{
static inline void dccp_update_gsr(struct sock *sk, u64 seq)
{
struct dccp_sock *dp = dccp_sk(sk);
- const struct dccp_minisock *dmsk = dccp_msk(sk);
dp->dccps_gsr = seq;
- dccp_set_seqno(&dp->dccps_swl,
- dp->dccps_gsr + 1 - (dmsk->dccpms_sequence_window / 4));
- dccp_set_seqno(&dp->dccps_swh,
- dp->dccps_gsr + (3 * dmsk->dccpms_sequence_window) / 4);
+ /* Sequence validity window depends on remote Sequence Window (7.5.1) */
+ dp->dccps_swl = SUB48(ADD48(dp->dccps_gsr, 1), dp->dccps_r_seq_win / 4);
+ dp->dccps_swh = ADD48(dp->dccps_gsr, (3 * dp->dccps_r_seq_win) / 4);
}
static inline void dccp_update_gss(struct sock *sk, u64 seq)
{
struct dccp_sock *dp = dccp_sk(sk);
- dp->dccps_awh = dp->dccps_gss = seq;
- dccp_set_seqno(&dp->dccps_awl,
- (dp->dccps_gss -
- dccp_msk(sk)->dccpms_sequence_window + 1));
+ dp->dccps_gss = seq;
+ /* Ack validity window depends on local Sequence Window value (7.5.1) */
+ dp->dccps_awl = SUB48(ADD48(dp->dccps_gss, 1), dp->dccps_l_seq_win);
+ dp->dccps_awh = dp->dccps_gss;
}
-
+
static inline int dccp_ack_pending(const struct sock *sk)
{
const struct dccp_sock *dp = dccp_sk(sk);
return dp->dccps_timestamp_echo != 0 ||
-#ifdef CONFIG_IP_DCCP_ACKVEC
- (dccp_msk(sk)->dccpms_send_ack_vector &&
+ (dp->dccps_hc_rx_ackvec != NULL &&
dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) ||
-#endif
inet_csk_ack_scheduled(sk);
}
+extern int dccp_feat_finalise_settings(struct dccp_sock *dp);
+extern int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq);
+extern int dccp_feat_insert_opts(struct dccp_sock*, struct dccp_request_sock*,
+ struct sk_buff *skb);
+extern int dccp_feat_activate_values(struct sock *sk, struct list_head *fn);
+extern void dccp_feat_list_purge(struct list_head *fn_list);
+
extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb);
+extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*);
extern int dccp_insert_option_elapsed_time(struct sock *sk,
struct sk_buff *skb,
u32 elapsed_time);
+extern u32 dccp_timestamp(void);
+extern void dccp_timestamping_init(void);
extern int dccp_insert_option_timestamp(struct sock *sk,
struct sk_buff *skb);
extern int dccp_insert_option(struct sock *sk, struct sk_buff *skb,
unsigned char option,
const void *value, unsigned char len);
-extern void dccp_timestamp(const struct sock *sk, struct timeval *tv);
-
-static inline suseconds_t timeval_usecs(const struct timeval *tv)
-{
- return tv->tv_sec * USEC_PER_SEC + tv->tv_usec;
-}
-
-static inline suseconds_t timeval_delta(const struct timeval *large,
- const struct timeval *small)
-{
- time_t secs = large->tv_sec - small->tv_sec;
- suseconds_t usecs = large->tv_usec - small->tv_usec;
-
- if (usecs < 0) {
- secs--;
- usecs += USEC_PER_SEC;
- }
- return secs * USEC_PER_SEC + usecs;
-}
-
-static inline void timeval_add_usecs(struct timeval *tv,
- const suseconds_t usecs)
-{
- tv->tv_usec += usecs;
- while (tv->tv_usec >= USEC_PER_SEC) {
- tv->tv_sec++;
- tv->tv_usec -= USEC_PER_SEC;
- }
-}
-
-static inline void timeval_sub_usecs(struct timeval *tv,
- const suseconds_t usecs)
-{
- tv->tv_usec -= usecs;
- while (tv->tv_usec < 0) {
- tv->tv_sec--;
- tv->tv_usec += USEC_PER_SEC;
- }
-}
-
#ifdef CONFIG_SYSCTL
extern int dccp_sysctl_init(void);
extern void dccp_sysctl_exit(void);