#include <linux/inet.h>
#include <linux/kthread.h>
#include <linux/net.h>
+#include <linux/slab.h>
#include <linux/socket.h>
#include <linux/string.h>
#include <net/tcp.h>
static char tag_ack = CEPH_MSGR_TAG_ACK;
static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE;
+#ifdef CONFIG_LOCKDEP
+static struct lock_class_key socket_class;
+#endif
+
static void queue_con(struct ceph_connection *con);
static void con_work(struct work_struct *);
con->sock = sock;
sock->sk->sk_allocation = GFP_NOFS;
+#ifdef CONFIG_LOCKDEP
+ lockdep_set_class(&sock->sk->sk_lock, &socket_class);
+#endif
+
set_sock_callbacks(sock, con);
dout("connect %s\n", pr_addr(&con->peer_addr.in_addr));
con->out_msg = NULL;
}
con->in_seq = 0;
+ con->in_seq_acked = 0;
}
/*
dout("con_close %p peer %s\n", con, pr_addr(&con->peer_addr.in_addr));
set_bit(CLOSED, &con->state); /* in case there's queued work */
clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */
+ clear_bit(LOSSYTX, &con->state); /* so we retry next connect */
+ clear_bit(KEEPALIVE_PENDING, &con->state);
+ clear_bit(WRITE_PENDING, &con->state);
mutex_lock(&con->mutex);
reset_connection(con);
+ cancel_delayed_work(&con->work);
mutex_unlock(&con->mutex);
queue_con(con);
}
}
/*
+ * return true if this connection ever successfully opened
+ */
+bool ceph_con_opened(struct ceph_connection *con)
+{
+ return con->connect_seq > 0;
+}
+
+/*
* generic get/put
*/
struct ceph_connection *ceph_con_get(struct ceph_connection *con)
struct ceph_msg, list_head);
con->out_msg = m;
if (test_bit(LOSSYTX, &con->state)) {
+ list_del_init(&m->list_head);
+ } else {
/* put message on sent list */
ceph_msg_get(m);
list_move_tail(&m->list_head, &con->out_sent);
- } else {
- list_del_init(&m->list_head);
}
- m->hdr.seq = cpu_to_le64(++con->out_seq);
+ /*
+ * only assign outgoing seq # if we haven't sent this message
+ * yet. if it is requeued, resend with it's original seq.
+ */
+ if (m->needs_out_seq) {
+ m->hdr.seq = cpu_to_le64(++con->out_seq);
+ m->needs_out_seq = false;
+ }
dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n",
m, con->out_seq, le16_to_cpu(m->hdr.type),
con->in_base_pos = 0;
}
-static void prepare_read_connect_retry(struct ceph_connection *con)
-{
- dout("prepare_read_connect_retry %p\n", con);
- con->in_base_pos = strlen(CEPH_BANNER) + sizeof(con->actual_peer_addr)
- + sizeof(con->peer_addr_for_me);
-}
-
static void prepare_read_ack(struct ceph_connection *con)
{
dout("prepare_read_ack %p\n", con);
}
con->auth_retry = 1;
prepare_write_connect(con->msgr, con, 0);
- prepare_read_connect_retry(con);
+ prepare_read_connect(con);
break;
case CEPH_MSGR_TAG_RESETSESSION:
unsigned front_len, middle_len, data_len, data_off;
int datacrc = con->msgr->nocrc;
int skip;
+ u64 seq;
dout("read_partial_message con %p msg %p\n", con, m);
return -EIO;
data_off = le16_to_cpu(con->in_hdr.data_off);
+ /* verify seq# */
+ seq = le64_to_cpu(con->in_hdr.seq);
+ if ((s64)seq - (s64)con->in_seq < 1) {
+ pr_info("skipping %s%lld %s seq %lld, expected %lld\n",
+ ENTITY_NAME(con->peer_name),
+ pr_addr(&con->peer_addr.in_addr),
+ seq, con->in_seq + 1);
+ con->in_base_pos = -front_len - middle_len - data_len -
+ sizeof(m->footer);
+ con->in_tag = CEPH_MSGR_TAG_READY;
+ con->in_seq++;
+ return 0;
+ } else if ((s64)seq - (s64)con->in_seq > 1) {
+ pr_err("read_partial_message bad seq %lld expected %lld\n",
+ seq, con->in_seq + 1);
+ con->error_msg = "bad message sequence # for incoming message";
+ return -EBADMSG;
+ }
+
/* allocate message? */
if (!con->in_msg) {
dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip);
if (skip) {
/* skip this message */
- pr_err("alloc_msg returned NULL, skipping message\n");
+ dout("alloc_msg returned NULL, skipping message\n");
con->in_base_pos = -front_len - middle_len - data_len -
sizeof(m->footer);
con->in_tag = CEPH_MSGR_TAG_READY;
+ con->in_seq++;
return 0;
}
if (IS_ERR(con->in_msg)) {
ret = PTR_ERR(con->in_msg);
con->in_msg = NULL;
- con->error_msg = "error allocating memory for incoming message";
+ con->error_msg =
+ "error allocating memory for incoming message";
return ret;
}
m = con->in_msg;
clear_bit(BUSY, &con->state);
dout("con->state=%lu\n", con->state);
if (test_bit(QUEUED, &con->state)) {
- if (!backoff) {
+ if (!backoff || test_bit(OPENING, &con->state)) {
dout("con_work %p QUEUED reset, looping\n", con);
goto more;
}
goto out;
}
- clear_bit(BUSY, &con->state); /* to avoid an improbable race */
-
mutex_lock(&con->mutex);
+ if (test_bit(CLOSED, &con->state))
+ goto out_unlock;
con_close_socket(con);
con->in_msg = NULL;
}
+ /* Requeue anything that hasn't been acked */
+ list_splice_init(&con->out_sent, &con->out_queue);
+
+ /* If there are no messages in the queue, place the connection
+ * in a STANDBY state (i.e., don't try to reconnect just yet). */
+ if (list_empty(&con->out_queue) && !con->out_keepalive_pending) {
+ dout("fault setting STANDBY\n");
+ set_bit(STANDBY, &con->state);
+ } else {
+ /* retry after a delay. */
+ if (con->delay == 0)
+ con->delay = BASE_DELAY_INTERVAL;
+ else if (con->delay < MAX_DELAY_INTERVAL)
+ con->delay *= 2;
+ dout("fault queueing %p delay %lu\n", con, con->delay);
+ con->ops->get(con);
+ if (queue_delayed_work(ceph_msgr_wq, &con->work,
+ round_jiffies_relative(con->delay)) == 0)
+ con->ops->put(con);
+ }
+
+out_unlock:
+ mutex_unlock(&con->mutex);
+out:
/*
* in case we faulted due to authentication, invalidate our
* current tickets so that we can get new ones.
con->ops->invalidate_authorizer(con);
}
- /* If there are no messages in the queue, place the connection
- * in a STANDBY state (i.e., don't try to reconnect just yet). */
- if (list_empty(&con->out_queue) && !con->out_keepalive_pending) {
- dout("fault setting STANDBY\n");
- set_bit(STANDBY, &con->state);
- mutex_unlock(&con->mutex);
- goto out;
- }
-
- /* Requeue anything that hasn't been acked, and retry after a
- * delay. */
- list_splice_init(&con->out_sent, &con->out_queue);
-
- if (con->delay == 0)
- con->delay = BASE_DELAY_INTERVAL;
- else if (con->delay < MAX_DELAY_INTERVAL)
- con->delay *= 2;
-
- mutex_unlock(&con->mutex);
-
- /* explicitly schedule work to try to reconnect again later. */
- dout("fault queueing %p delay %lu\n", con, con->delay);
- con->ops->get(con);
- if (queue_delayed_work(ceph_msgr_wq, &con->work,
- round_jiffies_relative(con->delay)) == 0)
- con->ops->put(con);
-
-out:
if (con->ops->fault)
con->ops->fault(con);
}
msg->hdr.src.addr = con->msgr->my_enc_addr;
msg->hdr.orig_src = msg->hdr.src;
+ BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len));
+
+ msg->needs_out_seq = true;
+
/* queue */
mutex_lock(&con->mutex);
BUG_ON(!list_empty(&msg->list_head));
ceph_msg_put(con->in_msg);
con->in_msg = NULL;
con->in_tag = CEPH_MSGR_TAG_READY;
+ con->in_seq++;
} else {
dout("con_revoke_pages %p msg %p pages %p no-op\n",
con, con->in_msg, msg);
kref_init(&m->kref);
INIT_LIST_HEAD(&m->list_head);
+ m->hdr.tid = 0;
m->hdr.type = cpu_to_le16(type);
+ m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT);
+ m->hdr.version = 0;
m->hdr.front_len = cpu_to_le32(front_len);
m->hdr.middle_len = 0;
m->hdr.data_len = cpu_to_le32(page_len);
m->hdr.data_off = cpu_to_le16(page_off);
- m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT);
+ m->hdr.reserved = 0;
m->footer.front_crc = 0;
m->footer.middle_crc = 0;
m->footer.data_crc = 0;
+ m->footer.flags = 0;
m->front_max = front_len;
m->front_is_vmalloc = false;
m->more_to_follow = false;