SUNRPC: Allow RPCs to fail quickly if the server is unreachable
[safe/jmp/linux-2.6] / net / sunrpc / xprtsock.c
index f05a56e..ff312f8 100644 (file)
 #include <linux/tcp.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/xprtsock.h>
 #include <linux/file.h>
+#ifdef CONFIG_NFS_V4_1
+#include <linux/sunrpc/bc_xprt.h>
+#endif
 
 #include <net/sock.h>
 #include <net/checksum.h>
 #include <net/udp.h>
 #include <net/tcp.h>
 
+#include "sunrpc.h"
 /*
  * xprtsock tunables
  */
@@ -245,8 +250,8 @@ struct sock_xprt {
         * Connection of transports
         */
        struct delayed_work     connect_worker;
-       struct sockaddr_storage addr;
-       unsigned short          port;
+       struct sockaddr_storage srcaddr;
+       unsigned short          srcport;
 
        /*
         * UDP socket buffer size parameters
@@ -270,6 +275,13 @@ struct sock_xprt {
 #define TCP_RCV_COPY_FRAGHDR   (1UL << 1)
 #define TCP_RCV_COPY_XID       (1UL << 2)
 #define TCP_RCV_COPY_DATA      (1UL << 3)
+#define TCP_RCV_READ_CALLDIR   (1UL << 4)
+#define TCP_RCV_COPY_CALLDIR   (1UL << 5)
+
+/*
+ * TCP RPC flags
+ */
+#define TCP_RPC_REPLY          (1UL << 6)
 
 static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt)
 {
@@ -286,117 +298,60 @@ static inline struct sockaddr_in6 *xs_addr_in6(struct rpc_xprt *xprt)
        return (struct sockaddr_in6 *) &xprt->addr;
 }
 
-static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt,
-                                         const char *protocol,
-                                         const char *netid)
+static void xs_format_common_peer_addresses(struct rpc_xprt *xprt)
 {
-       struct sockaddr_in *addr = xs_addr_in(xprt);
-       char *buf;
-
-       buf = kzalloc(20, GFP_KERNEL);
-       if (buf) {
-               snprintf(buf, 20, "%pI4", &addr->sin_addr.s_addr);
-       }
-       xprt->address_strings[RPC_DISPLAY_ADDR] = buf;
+       struct sockaddr *sap = xs_addr(xprt);
+       struct sockaddr_in6 *sin6;
+       struct sockaddr_in *sin;
+       char buf[128];
 
-       buf = kzalloc(8, GFP_KERNEL);
-       if (buf) {
-               snprintf(buf, 8, "%u",
-                               ntohs(addr->sin_port));
-       }
-       xprt->address_strings[RPC_DISPLAY_PORT] = buf;
-
-       xprt->address_strings[RPC_DISPLAY_PROTO] = protocol;
-
-       buf = kzalloc(48, GFP_KERNEL);
-       if (buf) {
-               snprintf(buf, 48, "addr=%pI4 port=%u proto=%s",
-                       &addr->sin_addr.s_addr,
-                       ntohs(addr->sin_port),
-                       protocol);
-       }
-       xprt->address_strings[RPC_DISPLAY_ALL] = buf;
+       (void)rpc_ntop(sap, buf, sizeof(buf));
+       xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL);
 
-       buf = kzalloc(10, GFP_KERNEL);
-       if (buf) {
-               snprintf(buf, 10, "%02x%02x%02x%02x",
-                               NIPQUAD(addr->sin_addr.s_addr));
-       }
-       xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf;
-
-       buf = kzalloc(8, GFP_KERNEL);
-       if (buf) {
-               snprintf(buf, 8, "%4hx",
-                               ntohs(addr->sin_port));
-       }
-       xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf;
-
-       buf = kzalloc(30, GFP_KERNEL);
-       if (buf) {
-               snprintf(buf, 30, "%pI4.%u.%u",
-                               &addr->sin_addr.s_addr,
-                               ntohs(addr->sin_port) >> 8,
-                               ntohs(addr->sin_port) & 0xff);
+       switch (sap->sa_family) {
+       case AF_INET:
+               sin = xs_addr_in(xprt);
+               (void)snprintf(buf, sizeof(buf), "%02x%02x%02x%02x",
+                                       NIPQUAD(sin->sin_addr.s_addr));
+               break;
+       case AF_INET6:
+               sin6 = xs_addr_in6(xprt);
+               (void)snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr);
+               break;
+       default:
+               BUG();
        }
-       xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf;
-
-       xprt->address_strings[RPC_DISPLAY_NETID] = netid;
+       xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
 }
 
-static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt,
-                                         const char *protocol,
-                                         const char *netid)
+static void xs_format_common_peer_ports(struct rpc_xprt *xprt)
 {
-       struct sockaddr_in6 *addr = xs_addr_in6(xprt);
-       char *buf;
+       struct sockaddr *sap = xs_addr(xprt);
+       char buf[128];
 
-       buf = kzalloc(40, GFP_KERNEL);
-       if (buf) {
-               snprintf(buf, 40, "%pI6",&addr->sin6_addr);
-       }
-       xprt->address_strings[RPC_DISPLAY_ADDR] = buf;
+       (void)snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
+       xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
 
-       buf = kzalloc(8, GFP_KERNEL);
-       if (buf) {
-               snprintf(buf, 8, "%u",
-                               ntohs(addr->sin6_port));
-       }
-       xprt->address_strings[RPC_DISPLAY_PORT] = buf;
+       (void)snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
+       xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
+}
 
+static void xs_format_peer_addresses(struct rpc_xprt *xprt,
+                                    const char *protocol,
+                                    const char *netid)
+{
        xprt->address_strings[RPC_DISPLAY_PROTO] = protocol;
+       xprt->address_strings[RPC_DISPLAY_NETID] = netid;
+       xs_format_common_peer_addresses(xprt);
+       xs_format_common_peer_ports(xprt);
+}
 
-       buf = kzalloc(64, GFP_KERNEL);
-       if (buf) {
-               snprintf(buf, 64, "addr=%pI6 port=%u proto=%s",
-                               &addr->sin6_addr,
-                               ntohs(addr->sin6_port),
-                               protocol);
-       }
-       xprt->address_strings[RPC_DISPLAY_ALL] = buf;
-
-       buf = kzalloc(36, GFP_KERNEL);
-       if (buf)
-               snprintf(buf, 36, "%pi6", &addr->sin6_addr);
-
-       xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf;
-
-       buf = kzalloc(8, GFP_KERNEL);
-       if (buf) {
-               snprintf(buf, 8, "%4hx",
-                               ntohs(addr->sin6_port));
-       }
-       xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf;
-
-       buf = kzalloc(50, GFP_KERNEL);
-       if (buf) {
-               snprintf(buf, 50, "%pI6.%u.%u",
-                        &addr->sin6_addr,
-                        ntohs(addr->sin6_port) >> 8,
-                        ntohs(addr->sin6_port) & 0xff);
-       }
-       xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf;
+static void xs_update_peer_port(struct rpc_xprt *xprt)
+{
+       kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]);
+       kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
 
-       xprt->address_strings[RPC_DISPLAY_NETID] = netid;
+       xs_format_common_peer_ports(xprt);
 }
 
 static void xs_free_peer_addresses(struct rpc_xprt *xprt)
@@ -726,10 +681,10 @@ static int xs_tcp_send_request(struct rpc_task *task)
                dprintk("RPC:       sendmsg returned unrecognized error %d\n",
                        -status);
        case -ECONNRESET:
+       case -EPIPE:
                xs_tcp_shutdown(xprt);
        case -ECONNREFUSED:
        case -ENOTCONN:
-       case -EPIPE:
                clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
        }
 out:
@@ -807,6 +762,9 @@ static void xs_reset_transport(struct sock_xprt *transport)
  *
  * This is used when all requests are complete; ie, no DRC state remains
  * on the server we want to save.
+ *
+ * The caller _must_ be holding XPRT_LOCKED in order to avoid issues with
+ * xs_reset_transport() zeroing the socket from underneath a writer.
  */
 static void xs_close(struct rpc_xprt *xprt)
 {
@@ -815,6 +773,7 @@ static void xs_close(struct rpc_xprt *xprt)
        dprintk("RPC:       xs_close xprt %p\n", xprt);
 
        xs_reset_transport(transport);
+       xprt->reestablish_timeout = 0;
 
        smp_mb__before_clear_bit();
        clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
@@ -824,6 +783,14 @@ static void xs_close(struct rpc_xprt *xprt)
        xprt_disconnect_done(xprt);
 }
 
+static void xs_tcp_close(struct rpc_xprt *xprt)
+{
+       if (test_and_clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state))
+               xs_close(xprt);
+       else
+               xs_tcp_shutdown(xprt);
+}
+
 /**
  * xs_destroy - prepare to shutdown a transport
  * @xprt: doomed transport
@@ -907,7 +874,7 @@ static void xs_udp_data_ready(struct sock *sk, int len)
        UDPX_INC_STATS_BH(sk, UDP_MIB_INDATAGRAMS);
 
        /* Something worked... */
-       dst_confirm(skb->dst);
+       dst_confirm(skb_dst(skb));
 
        xprt_adjust_cwnd(task, copied);
        xprt_update_rtt(task);
@@ -945,7 +912,7 @@ static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_rea
        transport->tcp_offset = 0;
 
        /* Sanity check of the record length */
-       if (unlikely(transport->tcp_reclen < 4)) {
+       if (unlikely(transport->tcp_reclen < 8)) {
                dprintk("RPC:       invalid TCP record fragment length\n");
                xprt_force_disconnect(xprt);
                return;
@@ -980,33 +947,77 @@ static inline void xs_tcp_read_xid(struct sock_xprt *transport, struct xdr_skb_r
        if (used != len)
                return;
        transport->tcp_flags &= ~TCP_RCV_COPY_XID;
-       transport->tcp_flags |= TCP_RCV_COPY_DATA;
+       transport->tcp_flags |= TCP_RCV_READ_CALLDIR;
        transport->tcp_copied = 4;
-       dprintk("RPC:       reading reply for XID %08x\n",
+       dprintk("RPC:       reading %s XID %08x\n",
+                       (transport->tcp_flags & TCP_RPC_REPLY) ? "reply for"
+                                                             : "request with",
                        ntohl(transport->tcp_xid));
        xs_tcp_check_fraghdr(transport);
 }
 
-static inline void xs_tcp_read_request(struct rpc_xprt *xprt, struct xdr_skb_reader *desc)
+static inline void xs_tcp_read_calldir(struct sock_xprt *transport,
+                                      struct xdr_skb_reader *desc)
 {
-       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
-       struct rpc_rqst *req;
+       size_t len, used;
+       u32 offset;
+       __be32  calldir;
+
+       /*
+        * We want transport->tcp_offset to be 8 at the end of this routine
+        * (4 bytes for the xid and 4 bytes for the call/reply flag).
+        * When this function is called for the first time,
+        * transport->tcp_offset is 4 (after having already read the xid).
+        */
+       offset = transport->tcp_offset - sizeof(transport->tcp_xid);
+       len = sizeof(calldir) - offset;
+       dprintk("RPC:       reading CALL/REPLY flag (%Zu bytes)\n", len);
+       used = xdr_skb_read_bits(desc, &calldir, len);
+       transport->tcp_offset += used;
+       if (used != len)
+               return;
+       transport->tcp_flags &= ~TCP_RCV_READ_CALLDIR;
+       transport->tcp_flags |= TCP_RCV_COPY_CALLDIR;
+       transport->tcp_flags |= TCP_RCV_COPY_DATA;
+       /*
+        * We don't yet have the XDR buffer, so we will write the calldir
+        * out after we get the buffer from the 'struct rpc_rqst'
+        */
+       if (ntohl(calldir) == RPC_REPLY)
+               transport->tcp_flags |= TCP_RPC_REPLY;
+       else
+               transport->tcp_flags &= ~TCP_RPC_REPLY;
+       dprintk("RPC:       reading %s CALL/REPLY flag %08x\n",
+                       (transport->tcp_flags & TCP_RPC_REPLY) ?
+                               "reply for" : "request with", calldir);
+       xs_tcp_check_fraghdr(transport);
+}
+
+static inline void xs_tcp_read_common(struct rpc_xprt *xprt,
+                                    struct xdr_skb_reader *desc,
+                                    struct rpc_rqst *req)
+{
+       struct sock_xprt *transport =
+                               container_of(xprt, struct sock_xprt, xprt);
        struct xdr_buf *rcvbuf;
        size_t len;
        ssize_t r;
 
-       /* Find and lock the request corresponding to this xid */
-       spin_lock(&xprt->transport_lock);
-       req = xprt_lookup_rqst(xprt, transport->tcp_xid);
-       if (!req) {
-               transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
-               dprintk("RPC:       XID %08x request not found!\n",
-                               ntohl(transport->tcp_xid));
-               spin_unlock(&xprt->transport_lock);
-               return;
+       rcvbuf = &req->rq_private_buf;
+
+       if (transport->tcp_flags & TCP_RCV_COPY_CALLDIR) {
+               /*
+                * Save the RPC direction in the XDR buffer
+                */
+               __be32  calldir = transport->tcp_flags & TCP_RPC_REPLY ?
+                                       htonl(RPC_REPLY) : 0;
+
+               memcpy(rcvbuf->head[0].iov_base + transport->tcp_copied,
+                       &calldir, sizeof(calldir));
+               transport->tcp_copied += sizeof(calldir);
+               transport->tcp_flags &= ~TCP_RCV_COPY_CALLDIR;
        }
 
-       rcvbuf = &req->rq_private_buf;
        len = desc->count;
        if (len > transport->tcp_reclen - transport->tcp_offset) {
                struct xdr_skb_reader my_desc;
@@ -1043,7 +1054,7 @@ static inline void xs_tcp_read_request(struct rpc_xprt *xprt, struct xdr_skb_rea
                                "tcp_offset = %u, tcp_reclen = %u\n",
                                xprt, transport->tcp_copied,
                                transport->tcp_offset, transport->tcp_reclen);
-               goto out;
+               return;
        }
 
        dprintk("RPC:       XID %08x read %Zd bytes\n",
@@ -1059,11 +1070,125 @@ static inline void xs_tcp_read_request(struct rpc_xprt *xprt, struct xdr_skb_rea
                        transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
        }
 
-out:
+       return;
+}
+
+/*
+ * Finds the request corresponding to the RPC xid and invokes the common
+ * tcp read code to read the data.
+ */
+static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
+                                   struct xdr_skb_reader *desc)
+{
+       struct sock_xprt *transport =
+                               container_of(xprt, struct sock_xprt, xprt);
+       struct rpc_rqst *req;
+
+       dprintk("RPC:       read reply XID %08x\n", ntohl(transport->tcp_xid));
+
+       /* Find and lock the request corresponding to this xid */
+       spin_lock(&xprt->transport_lock);
+       req = xprt_lookup_rqst(xprt, transport->tcp_xid);
+       if (!req) {
+               dprintk("RPC:       XID %08x request not found!\n",
+                               ntohl(transport->tcp_xid));
+               spin_unlock(&xprt->transport_lock);
+               return -1;
+       }
+
+       xs_tcp_read_common(xprt, desc, req);
+
        if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
                xprt_complete_rqst(req->rq_task, transport->tcp_copied);
+
        spin_unlock(&xprt->transport_lock);
-       xs_tcp_check_fraghdr(transport);
+       return 0;
+}
+
+#if defined(CONFIG_NFS_V4_1)
+/*
+ * Obtains an rpc_rqst previously allocated and invokes the common
+ * tcp read code to read the data.  The result is placed in the callback
+ * queue.
+ * If we're unable to obtain the rpc_rqst we schedule the closing of the
+ * connection and return -1.
+ */
+static inline int xs_tcp_read_callback(struct rpc_xprt *xprt,
+                                      struct xdr_skb_reader *desc)
+{
+       struct sock_xprt *transport =
+                               container_of(xprt, struct sock_xprt, xprt);
+       struct rpc_rqst *req;
+
+       req = xprt_alloc_bc_request(xprt);
+       if (req == NULL) {
+               printk(KERN_WARNING "Callback slot table overflowed\n");
+               xprt_force_disconnect(xprt);
+               return -1;
+       }
+
+       req->rq_xid = transport->tcp_xid;
+       dprintk("RPC:       read callback  XID %08x\n", ntohl(req->rq_xid));
+       xs_tcp_read_common(xprt, desc, req);
+
+       if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) {
+               struct svc_serv *bc_serv = xprt->bc_serv;
+
+               /*
+                * Add callback request to callback list.  The callback
+                * service sleeps on the sv_cb_waitq waiting for new
+                * requests.  Wake it up after adding enqueing the
+                * request.
+                */
+               dprintk("RPC:       add callback request to list\n");
+               spin_lock(&bc_serv->sv_cb_lock);
+               list_add(&req->rq_bc_list, &bc_serv->sv_cb_list);
+               spin_unlock(&bc_serv->sv_cb_lock);
+               wake_up(&bc_serv->sv_cb_waitq);
+       }
+
+       req->rq_private_buf.len = transport->tcp_copied;
+
+       return 0;
+}
+
+static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
+                                       struct xdr_skb_reader *desc)
+{
+       struct sock_xprt *transport =
+                               container_of(xprt, struct sock_xprt, xprt);
+
+       return (transport->tcp_flags & TCP_RPC_REPLY) ?
+               xs_tcp_read_reply(xprt, desc) :
+               xs_tcp_read_callback(xprt, desc);
+}
+#else
+static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
+                                       struct xdr_skb_reader *desc)
+{
+       return xs_tcp_read_reply(xprt, desc);
+}
+#endif /* CONFIG_NFS_V4_1 */
+
+/*
+ * Read data off the transport.  This can be either an RPC_CALL or an
+ * RPC_REPLY.  Relay the processing to helper functions.
+ */
+static void xs_tcp_read_data(struct rpc_xprt *xprt,
+                                   struct xdr_skb_reader *desc)
+{
+       struct sock_xprt *transport =
+                               container_of(xprt, struct sock_xprt, xprt);
+
+       if (_xs_tcp_read_data(xprt, desc) == 0)
+               xs_tcp_check_fraghdr(transport);
+       else {
+               /*
+                * The transport_lock protects the request handling.
+                * There's no need to hold it to update the tcp_flags.
+                */
+               transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
+       }
 }
 
 static inline void xs_tcp_read_discard(struct sock_xprt *transport, struct xdr_skb_reader *desc)
@@ -1103,9 +1228,14 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns
                        xs_tcp_read_xid(transport, &desc);
                        continue;
                }
+               /* Read in the call/reply flag */
+               if (transport->tcp_flags & TCP_RCV_READ_CALLDIR) {
+                       xs_tcp_read_calldir(transport, &desc);
+                       continue;
+               }
                /* Read in the request data */
                if (transport->tcp_flags & TCP_RCV_COPY_DATA) {
-                       xs_tcp_read_request(xprt, &desc);
+                       xs_tcp_read_data(xprt, &desc);
                        continue;
                }
                /* Skip over any trailing bytes on short reads */
@@ -1135,6 +1265,12 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes)
        if (xprt->shutdown)
                goto out;
 
+       /* Any data means we had a useful conversation, so
+        * the we don't need to delay the next reconnect
+        */
+       if (xprt->reestablish_timeout)
+               xprt->reestablish_timeout = 0;
+
        /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
        rd_desc.arg.data = xprt;
        do {
@@ -1280,6 +1416,23 @@ out:
        read_unlock(&sk->sk_callback_lock);
 }
 
+static void xs_write_space(struct sock *sk)
+{
+       struct socket *sock;
+       struct rpc_xprt *xprt;
+
+       if (unlikely(!(sock = sk->sk_socket)))
+               return;
+       clear_bit(SOCK_NOSPACE, &sock->flags);
+
+       if (unlikely(!(xprt = xprt_from_sock(sk))))
+               return;
+       if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0)
+               return;
+
+       xprt_write_space(xprt);
+}
+
 /**
  * xs_udp_write_space - callback invoked when socket buffer space
  *                             becomes available
@@ -1295,23 +1448,9 @@ static void xs_udp_write_space(struct sock *sk)
        read_lock(&sk->sk_callback_lock);
 
        /* from net/core/sock.c:sock_def_write_space */
-       if (sock_writeable(sk)) {
-               struct socket *sock;
-               struct rpc_xprt *xprt;
+       if (sock_writeable(sk))
+               xs_write_space(sk);
 
-               if (unlikely(!(sock = sk->sk_socket)))
-                       goto out;
-               clear_bit(SOCK_NOSPACE, &sock->flags);
-
-               if (unlikely(!(xprt = xprt_from_sock(sk))))
-                       goto out;
-               if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0)
-                       goto out;
-
-               xprt_write_space(xprt);
-       }
-
- out:
        read_unlock(&sk->sk_callback_lock);
 }
 
@@ -1330,23 +1469,9 @@ static void xs_tcp_write_space(struct sock *sk)
        read_lock(&sk->sk_callback_lock);
 
        /* from net/core/stream.c:sk_stream_write_space */
-       if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
-               struct socket *sock;
-               struct rpc_xprt *xprt;
-
-               if (unlikely(!(sock = sk->sk_socket)))
-                       goto out;
-               clear_bit(SOCK_NOSPACE, &sock->flags);
+       if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
+               xs_write_space(sk);
 
-               if (unlikely(!(xprt = xprt_from_sock(sk))))
-                       goto out;
-               if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0)
-                       goto out;
-
-               xprt_write_space(xprt);
-       }
-
- out:
        read_unlock(&sk->sk_callback_lock);
 }
 
@@ -1414,25 +1539,15 @@ static unsigned short xs_get_random_port(void)
  */
 static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
 {
-       struct sockaddr *addr = xs_addr(xprt);
-
        dprintk("RPC:       setting port for xprt %p to %u\n", xprt, port);
 
-       switch (addr->sa_family) {
-       case AF_INET:
-               ((struct sockaddr_in *)addr)->sin_port = htons(port);
-               break;
-       case AF_INET6:
-               ((struct sockaddr_in6 *)addr)->sin6_port = htons(port);
-               break;
-       default:
-               BUG();
-       }
+       rpc_set_port(xs_addr(xprt), port);
+       xs_update_peer_port(xprt);
 }
 
 static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket *sock)
 {
-       unsigned short port = transport->port;
+       unsigned short port = transport->srcport;
 
        if (port == 0 && transport->xprt.resvport)
                port = xs_get_random_port();
@@ -1441,8 +1556,8 @@ static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket
 
 static unsigned short xs_next_srcport(struct sock_xprt *transport, struct socket *sock, unsigned short port)
 {
-       if (transport->port != 0)
-               transport->port = 0;
+       if (transport->srcport != 0)
+               transport->srcport = 0;
        if (!transport->xprt.resvport)
                return 0;
        if (port <= xprt_min_resvport || port > xprt_max_resvport)
@@ -1460,7 +1575,7 @@ static int xs_bind4(struct sock_xprt *transport, struct socket *sock)
        unsigned short port = xs_get_srcport(transport, sock);
        unsigned short last;
 
-       sa = (struct sockaddr_in *)&transport->addr;
+       sa = (struct sockaddr_in *)&transport->srcaddr;
        myaddr.sin_addr = sa->sin_addr;
        do {
                myaddr.sin_port = htons(port);
@@ -1469,7 +1584,7 @@ static int xs_bind4(struct sock_xprt *transport, struct socket *sock)
                if (port == 0)
                        break;
                if (err == 0) {
-                       transport->port = port;
+                       transport->srcport = port;
                        break;
                }
                last = port;
@@ -1493,7 +1608,7 @@ static int xs_bind6(struct sock_xprt *transport, struct socket *sock)
        unsigned short port = xs_get_srcport(transport, sock);
        unsigned short last;
 
-       sa = (struct sockaddr_in6 *)&transport->addr;
+       sa = (struct sockaddr_in6 *)&transport->srcaddr;
        myaddr.sin6_addr = sa->sin6_addr;
        do {
                myaddr.sin6_port = htons(port);
@@ -1502,7 +1617,7 @@ static int xs_bind6(struct sock_xprt *transport, struct socket *sock)
                if (port == 0)
                        break;
                if (err == 0) {
-                       transport->port = port;
+                       transport->srcport = port;
                        break;
                }
                last = port;
@@ -1607,8 +1722,11 @@ static void xs_udp_connect_worker4(struct work_struct *work)
                goto out;
        }
 
-       dprintk("RPC:       worker connecting xprt %p to address: %s\n",
-                       xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
+       dprintk("RPC:       worker connecting xprt %p via %s to "
+                               "%s (port %s)\n", xprt,
+                       xprt->address_strings[RPC_DISPLAY_PROTO],
+                       xprt->address_strings[RPC_DISPLAY_ADDR],
+                       xprt->address_strings[RPC_DISPLAY_PORT]);
 
        xs_udp_finish_connecting(xprt, sock);
        status = 0;
@@ -1649,8 +1767,11 @@ static void xs_udp_connect_worker6(struct work_struct *work)
                goto out;
        }
 
-       dprintk("RPC:       worker connecting xprt %p to address: %s\n",
-                       xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
+       dprintk("RPC:       worker connecting xprt %p via %s to "
+                               "%s (port %s)\n", xprt,
+                       xprt->address_strings[RPC_DISPLAY_PROTO],
+                       xprt->address_strings[RPC_DISPLAY_ADDR],
+                       xprt->address_strings[RPC_DISPLAY_PORT]);
 
        xs_udp_finish_connecting(xprt, sock);
        status = 0;
@@ -1775,14 +1896,27 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
                        goto out_eagain;
        }
 
-       dprintk("RPC:       worker connecting xprt %p to address: %s\n",
-                       xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
+       dprintk("RPC:       worker connecting xprt %p via %s to "
+                               "%s (port %s)\n", xprt,
+                       xprt->address_strings[RPC_DISPLAY_PROTO],
+                       xprt->address_strings[RPC_DISPLAY_ADDR],
+                       xprt->address_strings[RPC_DISPLAY_PORT]);
 
        status = xs_tcp_finish_connecting(xprt, sock);
        dprintk("RPC:       %p connect status %d connected %d sock state %d\n",
                        xprt, -status, xprt_connected(xprt),
                        sock->sk->sk_state);
        switch (status) {
+       default:
+               printk("%s: connect returned unhandled error %d\n",
+                       __func__, status);
+       case -EADDRNOTAVAIL:
+               /* We're probably in TIME_WAIT. Get rid of existing socket,
+                * and retry
+                */
+               set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
+               xprt_force_disconnect(xprt);
+               break;
        case -ECONNREFUSED:
        case -ECONNRESET:
        case -ENETUNREACH:
@@ -1793,10 +1927,6 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
                xprt_clear_connecting(xprt);
                return;
        }
-       /* get rid of existing socket, and retry */
-       xs_tcp_shutdown(xprt);
-       printk("%s: connect returned unhandled error %d\n",
-                       __func__, status);
 out_eagain:
        status = -EAGAIN;
 out:
@@ -1903,7 +2033,7 @@ static void xs_connect(struct rpc_task *task)
        if (xprt_test_and_set_connecting(xprt))
                return;
 
-       if (transport->sock != NULL) {
+       if (transport->sock != NULL && !RPC_IS_SOFTCONN(task)) {
                dprintk("RPC:       xs_connect delayed xprt %p for %lu "
                                "seconds\n",
                                xprt, xprt->reestablish_timeout / HZ);
@@ -1911,6 +2041,8 @@ static void xs_connect(struct rpc_task *task)
                                   &transport->connect_worker,
                                   xprt->reestablish_timeout);
                xprt->reestablish_timeout <<= 1;
+               if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
+                       xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
                if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO)
                        xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO;
        } else {
@@ -1941,7 +2073,7 @@ static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 
        seq_printf(seq, "\txprt:\tudp %u %lu %lu %lu %lu %Lu %Lu\n",
-                       transport->port,
+                       transport->srcport,
                        xprt->stat.bind_count,
                        xprt->stat.sends,
                        xprt->stat.recvs,
@@ -1965,7 +2097,7 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
                idle_time = (long)(jiffies - xprt->last_used) / HZ;
 
        seq_printf(seq, "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu\n",
-                       transport->port,
+                       transport->srcport,
                        xprt->stat.bind_count,
                        xprt->stat.connect_count,
                        xprt->stat.connect_time,
@@ -1977,6 +2109,134 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
                        xprt->stat.bklog_u);
 }
 
+/*
+ * Allocate a bunch of pages for a scratch buffer for the rpc code. The reason
+ * we allocate pages instead doing a kmalloc like rpc_malloc is because we want
+ * to use the server side send routines.
+ */
+void *bc_malloc(struct rpc_task *task, size_t size)
+{
+       struct page *page;
+       struct rpc_buffer *buf;
+
+       BUG_ON(size > PAGE_SIZE - sizeof(struct rpc_buffer));
+       page = alloc_page(GFP_KERNEL);
+
+       if (!page)
+               return NULL;
+
+       buf = page_address(page);
+       buf->len = PAGE_SIZE;
+
+       return buf->data;
+}
+
+/*
+ * Free the space allocated in the bc_alloc routine
+ */
+void bc_free(void *buffer)
+{
+       struct rpc_buffer *buf;
+
+       if (!buffer)
+               return;
+
+       buf = container_of(buffer, struct rpc_buffer, data);
+       free_page((unsigned long)buf);
+}
+
+/*
+ * Use the svc_sock to send the callback. Must be called with svsk->sk_mutex
+ * held. Borrows heavily from svc_tcp_sendto and xs_tcp_send_request.
+ */
+static int bc_sendto(struct rpc_rqst *req)
+{
+       int len;
+       struct xdr_buf *xbufp = &req->rq_snd_buf;
+       struct rpc_xprt *xprt = req->rq_xprt;
+       struct sock_xprt *transport =
+                               container_of(xprt, struct sock_xprt, xprt);
+       struct socket *sock = transport->sock;
+       unsigned long headoff;
+       unsigned long tailoff;
+
+       /*
+        * Set up the rpc header and record marker stuff
+        */
+       xs_encode_tcp_record_marker(xbufp);
+
+       tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK;
+       headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK;
+       len = svc_send_common(sock, xbufp,
+                             virt_to_page(xbufp->head[0].iov_base), headoff,
+                             xbufp->tail[0].iov_base, tailoff);
+
+       if (len != xbufp->len) {
+               printk(KERN_NOTICE "Error sending entire callback!\n");
+               len = -EAGAIN;
+       }
+
+       return len;
+}
+
+/*
+ * The send routine. Borrows from svc_send
+ */
+static int bc_send_request(struct rpc_task *task)
+{
+       struct rpc_rqst *req = task->tk_rqstp;
+       struct svc_xprt *xprt;
+       struct svc_sock         *svsk;
+       u32                     len;
+
+       dprintk("sending request with xid: %08x\n", ntohl(req->rq_xid));
+       /*
+        * Get the server socket associated with this callback xprt
+        */
+       xprt = req->rq_xprt->bc_xprt;
+       svsk = container_of(xprt, struct svc_sock, sk_xprt);
+
+       /*
+        * Grab the mutex to serialize data as the connection is shared
+        * with the fore channel
+        */
+       if (!mutex_trylock(&xprt->xpt_mutex)) {
+               rpc_sleep_on(&xprt->xpt_bc_pending, task, NULL);
+               if (!mutex_trylock(&xprt->xpt_mutex))
+                       return -EAGAIN;
+               rpc_wake_up_queued_task(&xprt->xpt_bc_pending, task);
+       }
+       if (test_bit(XPT_DEAD, &xprt->xpt_flags))
+               len = -ENOTCONN;
+       else
+               len = bc_sendto(req);
+       mutex_unlock(&xprt->xpt_mutex);
+
+       if (len > 0)
+               len = 0;
+
+       return len;
+}
+
+/*
+ * The close routine. Since this is client initiated, we do nothing
+ */
+
+static void bc_close(struct rpc_xprt *xprt)
+{
+       return;
+}
+
+/*
+ * The xprt destroy routine. Again, because this connection is client
+ * initiated, we do nothing
+ */
+
+static void bc_destroy(struct rpc_xprt *xprt)
+{
+       return;
+}
+
 static struct rpc_xprt_ops xs_udp_ops = {
        .set_buffer_size        = xs_udp_set_buffer_size,
        .reserve_xprt           = xprt_reserve_xprt_cong,
@@ -2005,11 +2265,30 @@ static struct rpc_xprt_ops xs_tcp_ops = {
        .buf_free               = rpc_free,
        .send_request           = xs_tcp_send_request,
        .set_retrans_timeout    = xprt_set_retrans_timeout_def,
-       .close                  = xs_tcp_shutdown,
+#if defined(CONFIG_NFS_V4_1)
+       .release_request        = bc_release_request,
+#endif /* CONFIG_NFS_V4_1 */
+       .close                  = xs_tcp_close,
        .destroy                = xs_destroy,
        .print_stats            = xs_tcp_print_stats,
 };
 
+/*
+ * The rpc_xprt_ops for the server backchannel
+ */
+
+static struct rpc_xprt_ops bc_tcp_ops = {
+       .reserve_xprt           = xprt_reserve_xprt,
+       .release_xprt           = xprt_release_xprt,
+       .buf_alloc              = bc_malloc,
+       .buf_free               = bc_free,
+       .send_request           = bc_send_request,
+       .set_retrans_timeout    = xprt_set_retrans_timeout_def,
+       .close                  = bc_close,
+       .destroy                = bc_destroy,
+       .print_stats            = xs_tcp_print_stats,
+};
+
 static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
                                      unsigned int slot_table_size)
 {
@@ -2041,7 +2320,7 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
        memcpy(&xprt->addr, args->dstaddr, args->addrlen);
        xprt->addrlen = args->addrlen;
        if (args->srcaddr)
-               memcpy(&new->addr, args->srcaddr, args->addrlen);
+               memcpy(&new->srcaddr, args->srcaddr, args->addrlen);
 
        return xprt;
 }
@@ -2090,7 +2369,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
 
                INIT_DELAYED_WORK(&transport->connect_worker,
                                        xs_udp_connect_worker4);
-               xs_format_ipv4_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP);
+               xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP);
                break;
        case AF_INET6:
                if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
@@ -2098,15 +2377,22 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
 
                INIT_DELAYED_WORK(&transport->connect_worker,
                                        xs_udp_connect_worker6);
-               xs_format_ipv6_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6);
+               xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6);
                break;
        default:
                kfree(xprt);
                return ERR_PTR(-EAFNOSUPPORT);
        }
 
-       dprintk("RPC:       set up transport to address %s\n",
-                       xprt->address_strings[RPC_DISPLAY_ALL]);
+       if (xprt_bound(xprt))
+               dprintk("RPC:       set up xprt to %s (port %s) via %s\n",
+                               xprt->address_strings[RPC_DISPLAY_ADDR],
+                               xprt->address_strings[RPC_DISPLAY_PORT],
+                               xprt->address_strings[RPC_DISPLAY_PROTO]);
+       else
+               dprintk("RPC:       set up xprt to %s (autobind) via %s\n",
+                               xprt->address_strings[RPC_DISPLAY_ADDR],
+                               xprt->address_strings[RPC_DISPLAY_PROTO]);
 
        if (try_module_get(THIS_MODULE))
                return xprt;
@@ -2155,23 +2441,33 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
                if (((struct sockaddr_in *)addr)->sin_port != htons(0))
                        xprt_set_bound(xprt);
 
-               INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker4);
-               xs_format_ipv4_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP);
+               INIT_DELAYED_WORK(&transport->connect_worker,
+                                       xs_tcp_connect_worker4);
+               xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP);
                break;
        case AF_INET6:
                if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
                        xprt_set_bound(xprt);
 
-               INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker6);
-               xs_format_ipv6_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
+               INIT_DELAYED_WORK(&transport->connect_worker,
+                                       xs_tcp_connect_worker6);
+               xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
                break;
        default:
                kfree(xprt);
                return ERR_PTR(-EAFNOSUPPORT);
        }
 
-       dprintk("RPC:       set up transport to address %s\n",
-                       xprt->address_strings[RPC_DISPLAY_ALL]);
+       if (xprt_bound(xprt))
+               dprintk("RPC:       set up xprt to %s (port %s) via %s\n",
+                               xprt->address_strings[RPC_DISPLAY_ADDR],
+                               xprt->address_strings[RPC_DISPLAY_PORT],
+                               xprt->address_strings[RPC_DISPLAY_PROTO]);
+       else
+               dprintk("RPC:       set up xprt to %s (autobind) via %s\n",
+                               xprt->address_strings[RPC_DISPLAY_ADDR],
+                               xprt->address_strings[RPC_DISPLAY_PROTO]);
+
 
        if (try_module_get(THIS_MODULE))
                return xprt;
@@ -2181,11 +2477,93 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
        return ERR_PTR(-EINVAL);
 }
 
+/**
+ * xs_setup_bc_tcp - Set up transport to use a TCP backchannel socket
+ * @args: rpc transport creation arguments
+ *
+ */
+static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
+{
+       struct sockaddr *addr = args->dstaddr;
+       struct rpc_xprt *xprt;
+       struct sock_xprt *transport;
+       struct svc_sock *bc_sock;
+
+       if (!args->bc_xprt)
+               ERR_PTR(-EINVAL);
+
+       xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
+       if (IS_ERR(xprt))
+               return xprt;
+       transport = container_of(xprt, struct sock_xprt, xprt);
+
+       xprt->prot = IPPROTO_TCP;
+       xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
+       xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
+       xprt->timeout = &xs_tcp_default_timeout;
+
+       /* backchannel */
+       xprt_set_bound(xprt);
+       xprt->bind_timeout = 0;
+       xprt->connect_timeout = 0;
+       xprt->reestablish_timeout = 0;
+       xprt->idle_timeout = 0;
+
+       /*
+        * The backchannel uses the same socket connection as the
+        * forechannel
+        */
+       xprt->bc_xprt = args->bc_xprt;
+       bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt);
+       bc_sock->sk_bc_xprt = xprt;
+       transport->sock = bc_sock->sk_sock;
+       transport->inet = bc_sock->sk_sk;
+
+       xprt->ops = &bc_tcp_ops;
+
+       switch (addr->sa_family) {
+       case AF_INET:
+               xs_format_peer_addresses(xprt, "tcp",
+                                        RPCBIND_NETID_TCP);
+               break;
+       case AF_INET6:
+               xs_format_peer_addresses(xprt, "tcp",
+                                  RPCBIND_NETID_TCP6);
+               break;
+       default:
+               kfree(xprt);
+               return ERR_PTR(-EAFNOSUPPORT);
+       }
+
+       if (xprt_bound(xprt))
+               dprintk("RPC:       set up xprt to %s (port %s) via %s\n",
+                               xprt->address_strings[RPC_DISPLAY_ADDR],
+                               xprt->address_strings[RPC_DISPLAY_PORT],
+                               xprt->address_strings[RPC_DISPLAY_PROTO]);
+       else
+               dprintk("RPC:       set up xprt to %s (autobind) via %s\n",
+                               xprt->address_strings[RPC_DISPLAY_ADDR],
+                               xprt->address_strings[RPC_DISPLAY_PROTO]);
+
+       /*
+        * Since we don't want connections for the backchannel, we set
+        * the xprt status to connected
+        */
+       xprt_set_connected(xprt);
+
+
+       if (try_module_get(THIS_MODULE))
+               return xprt;
+       kfree(xprt->slot);
+       kfree(xprt);
+       return ERR_PTR(-EINVAL);
+}
+
 static struct xprt_class       xs_udp_transport = {
        .list           = LIST_HEAD_INIT(xs_udp_transport.list),
        .name           = "udp",
        .owner          = THIS_MODULE,
-       .ident          = IPPROTO_UDP,
+       .ident          = XPRT_TRANSPORT_UDP,
        .setup          = xs_setup_udp,
 };
 
@@ -2193,10 +2571,18 @@ static struct xprt_class        xs_tcp_transport = {
        .list           = LIST_HEAD_INIT(xs_tcp_transport.list),
        .name           = "tcp",
        .owner          = THIS_MODULE,
-       .ident          = IPPROTO_TCP,
+       .ident          = XPRT_TRANSPORT_TCP,
        .setup          = xs_setup_tcp,
 };
 
+static struct xprt_class       xs_bc_tcp_transport = {
+       .list           = LIST_HEAD_INIT(xs_bc_tcp_transport.list),
+       .name           = "tcp NFSv4.1 backchannel",
+       .owner          = THIS_MODULE,
+       .ident          = XPRT_TRANSPORT_BC_TCP,
+       .setup          = xs_setup_bc_tcp,
+};
+
 /**
  * init_socket_xprt - set up xprtsock's sysctls, register with RPC client
  *
@@ -2210,6 +2596,7 @@ int init_socket_xprt(void)
 
        xprt_register_transport(&xs_udp_transport);
        xprt_register_transport(&xs_tcp_transport);
+       xprt_register_transport(&xs_bc_tcp_transport);
 
        return 0;
 }
@@ -2229,4 +2616,57 @@ void cleanup_socket_xprt(void)
 
        xprt_unregister_transport(&xs_udp_transport);
        xprt_unregister_transport(&xs_tcp_transport);
+       xprt_unregister_transport(&xs_bc_tcp_transport);
 }
+
+static int param_set_uint_minmax(const char *val, struct kernel_param *kp,
+               unsigned int min, unsigned int max)
+{
+       unsigned long num;
+       int ret;
+
+       if (!val)
+               return -EINVAL;
+       ret = strict_strtoul(val, 0, &num);
+       if (ret == -EINVAL || num < min || num > max)
+               return -EINVAL;
+       *((unsigned int *)kp->arg) = num;
+       return 0;
+}
+
+static int param_set_portnr(const char *val, struct kernel_param *kp)
+{
+       return param_set_uint_minmax(val, kp,
+                       RPC_MIN_RESVPORT,
+                       RPC_MAX_RESVPORT);
+}
+
+static int param_get_portnr(char *buffer, struct kernel_param *kp)
+{
+       return param_get_uint(buffer, kp);
+}
+#define param_check_portnr(name, p) \
+       __param_check(name, p, unsigned int);
+
+module_param_named(min_resvport, xprt_min_resvport, portnr, 0644);
+module_param_named(max_resvport, xprt_max_resvport, portnr, 0644);
+
+static int param_set_slot_table_size(const char *val, struct kernel_param *kp)
+{
+       return param_set_uint_minmax(val, kp,
+                       RPC_MIN_SLOT_TABLE,
+                       RPC_MAX_SLOT_TABLE);
+}
+
+static int param_get_slot_table_size(char *buffer, struct kernel_param *kp)
+{
+       return param_get_uint(buffer, kp);
+}
+#define param_check_slot_table_size(name, p) \
+       __param_check(name, p, unsigned int);
+
+module_param_named(tcp_slot_table_entries, xprt_tcp_slot_table_entries,
+                  slot_table_size, 0644);
+module_param_named(udp_slot_table_entries, xprt_udp_slot_table_entries,
+                  slot_table_size, 0644);
+