X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Fceph%2Fmessenger.c;h=cd4fadb6491afe3a48ad5081b5f4ab2a41e4b0bf;hb=3374cd1abd478f767aaedf2c21d109596ff0fe72;hp=0ddc2c75f6b407f4ddd7a4384308f301df211668;hpb=e80a52d14f868059e8ec790c9fae88cdb8a1df98;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 0ddc2c7..cd4fadb 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -29,6 +30,10 @@ static char tag_msg = CEPH_MSGR_TAG_MSG; static char tag_ack = CEPH_MSGR_TAG_ACK; static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; +#ifdef CONFIG_LOCKDEP +static struct lock_class_key socket_class; +#endif + static void queue_con(struct ceph_connection *con); static void con_work(struct work_struct *); @@ -227,6 +232,10 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con) con->sock = sock; sock->sk->sk_allocation = GFP_NOFS; +#ifdef CONFIG_LOCKDEP + lockdep_set_class(&sock->sk->sk_lock, &socket_class); +#endif + set_sock_callbacks(sock, con); dout("connect %s\n", pr_addr(&con->peer_addr.in_addr)); @@ -332,6 +341,7 @@ static void reset_connection(struct ceph_connection *con) con->out_msg = NULL; } con->in_seq = 0; + con->in_seq_acked = 0; } /* @@ -342,6 +352,9 @@ void ceph_con_close(struct ceph_connection *con) dout("con_close %p peer %s\n", con, pr_addr(&con->peer_addr.in_addr)); set_bit(CLOSED, &con->state); /* in case there's queued work */ clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */ + clear_bit(LOSSYTX, &con->state); /* so we retry next connect */ + clear_bit(KEEPALIVE_PENDING, &con->state); + clear_bit(WRITE_PENDING, &con->state); mutex_lock(&con->mutex); reset_connection(con); cancel_delayed_work(&con->work); @@ -363,6 +376,14 @@ void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr) } /* + * return true if this connection ever successfully opened + */ +bool ceph_con_opened(struct ceph_connection *con) +{ + return con->connect_seq > 0; +} + +/* * generic get/put */ struct ceph_connection *ceph_con_get(struct ceph_connection *con) @@ -471,7 +492,14 @@ static void prepare_write_message(struct ceph_connection *con) list_move_tail(&m->list_head, &con->out_sent); } - m->hdr.seq = cpu_to_le64(++con->out_seq); + /* + * only assign outgoing seq # if we haven't sent this message + * yet. if it is requeued, resend with it's original seq. + */ + if (m->needs_out_seq) { + m->hdr.seq = cpu_to_le64(++con->out_seq); + m->needs_out_seq = false; + } dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n", m, con->out_seq, le16_to_cpu(m->hdr.type), @@ -827,13 +855,6 @@ static void prepare_read_connect(struct ceph_connection *con) con->in_base_pos = 0; } -static void prepare_read_connect_retry(struct ceph_connection *con) -{ - dout("prepare_read_connect_retry %p\n", con); - con->in_base_pos = strlen(CEPH_BANNER) + sizeof(con->actual_peer_addr) - + sizeof(con->peer_addr_for_me); -} - static void prepare_read_ack(struct ceph_connection *con) { dout("prepare_read_ack %p\n", con); @@ -1143,7 +1164,7 @@ static int process_connect(struct ceph_connection *con) } con->auth_retry = 1; prepare_write_connect(con->msgr, con, 0); - prepare_read_connect_retry(con); + prepare_read_connect(con); break; case CEPH_MSGR_TAG_RESETSESSION: @@ -1320,6 +1341,7 @@ static int read_partial_message(struct ceph_connection *con) unsigned front_len, middle_len, data_len, data_off; int datacrc = con->msgr->nocrc; int skip; + u64 seq; dout("read_partial_message con %p msg %p\n", con, m); @@ -1354,6 +1376,25 @@ static int read_partial_message(struct ceph_connection *con) return -EIO; data_off = le16_to_cpu(con->in_hdr.data_off); + /* verify seq# */ + seq = le64_to_cpu(con->in_hdr.seq); + if ((s64)seq - (s64)con->in_seq < 1) { + pr_info("skipping %s%lld %s seq %lld, expected %lld\n", + ENTITY_NAME(con->peer_name), + pr_addr(&con->peer_addr.in_addr), + seq, con->in_seq + 1); + con->in_base_pos = -front_len - middle_len - data_len - + sizeof(m->footer); + con->in_tag = CEPH_MSGR_TAG_READY; + con->in_seq++; + return 0; + } else if ((s64)seq - (s64)con->in_seq > 1) { + pr_err("read_partial_message bad seq %lld expected %lld\n", + seq, con->in_seq + 1); + con->error_msg = "bad message sequence # for incoming message"; + return -EBADMSG; + } + /* allocate message? */ if (!con->in_msg) { dout("got hdr type %d front %d data %d\n", con->in_hdr.type, @@ -1365,6 +1406,7 @@ static int read_partial_message(struct ceph_connection *con) con->in_base_pos = -front_len - middle_len - data_len - sizeof(m->footer); con->in_tag = CEPH_MSGR_TAG_READY; + con->in_seq++; return 0; } if (IS_ERR(con->in_msg)) { @@ -1840,8 +1882,6 @@ static void ceph_fault(struct ceph_connection *con) goto out; } - clear_bit(BUSY, &con->state); /* to avoid an improbable race */ - mutex_lock(&con->mutex); if (test_bit(CLOSED, &con->state)) goto out_unlock; @@ -1951,6 +1991,10 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) msg->hdr.src.addr = con->msgr->my_enc_addr; msg->hdr.orig_src = msg->hdr.src; + BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); + + msg->needs_out_seq = true; + /* queue */ mutex_lock(&con->mutex); BUG_ON(!list_empty(&msg->list_head)); @@ -2016,6 +2060,7 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg) ceph_msg_put(con->in_msg); con->in_msg = NULL; con->in_tag = CEPH_MSGR_TAG_READY; + con->in_seq++; } else { dout("con_revoke_pages %p msg %p pages %p no-op\n", con, con->in_msg, msg); @@ -2049,15 +2094,19 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, kref_init(&m->kref); INIT_LIST_HEAD(&m->list_head); + m->hdr.tid = 0; m->hdr.type = cpu_to_le16(type); + m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); + m->hdr.version = 0; m->hdr.front_len = cpu_to_le32(front_len); m->hdr.middle_len = 0; m->hdr.data_len = cpu_to_le32(page_len); m->hdr.data_off = cpu_to_le16(page_off); - m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); + m->hdr.reserved = 0; m->footer.front_crc = 0; m->footer.middle_crc = 0; m->footer.data_crc = 0; + m->footer.flags = 0; m->front_max = front_len; m->front_is_vmalloc = false; m->more_to_follow = false;