SUNRPC: RPC metrics and RTT estimator should use same RTT value
[safe/jmp/linux-2.6] / include / linux / sunrpc / xprt.h
index 3b8b6e8..04fc342 100644 (file)
 #include <linux/uio.h>
 #include <linux/socket.h>
 #include <linux/in.h>
+#include <linux/kref.h>
 #include <linux/sunrpc/sched.h>
 #include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/msg_prot.h>
 
-extern unsigned int xprt_udp_slot_table_entries;
-extern unsigned int xprt_tcp_slot_table_entries;
+#ifdef __KERNEL__
 
 #define RPC_MIN_SLOT_TABLE     (2U)
 #define RPC_DEF_SLOT_TABLE     (16U)
 #define RPC_MAX_SLOT_TABLE     (128U)
 
 /*
- * RPC call and reply header size as number of 32bit words (verifier
- * size computed separately)
- */
-#define RPC_CALLHDRSIZE                6
-#define RPC_REPHDRSIZE         4
-
-/*
- * Parameters for choosing a free port
- */
-extern unsigned int xprt_min_resvport;
-extern unsigned int xprt_max_resvport;
-
-#define RPC_MIN_RESVPORT       (1U)
-#define RPC_MAX_RESVPORT       (65535U)
-#define RPC_DEF_MIN_RESVPORT   (650U)
-#define RPC_DEF_MAX_RESVPORT   (1023U)
-
-/*
  * This describes a timeout strategy
  */
 struct rpc_timeout {
@@ -51,8 +34,19 @@ struct rpc_timeout {
        unsigned char           to_exponential;
 };
 
+enum rpc_display_format_t {
+       RPC_DISPLAY_ADDR = 0,
+       RPC_DISPLAY_PORT,
+       RPC_DISPLAY_PROTO,
+       RPC_DISPLAY_HEX_ADDR,
+       RPC_DISPLAY_HEX_PORT,
+       RPC_DISPLAY_NETID,
+       RPC_DISPLAY_MAX,
+};
+
 struct rpc_task;
 struct rpc_xprt;
+struct seq_file;
 
 /*
  * This describes a complete RPC request
@@ -69,9 +63,10 @@ struct rpc_rqst {
         * This is the private part
         */
        struct rpc_task *       rq_task;        /* RPC task data */
-       __u32                   rq_xid;         /* request XID */
+       __be32                  rq_xid;         /* request XID */
        int                     rq_cong;        /* has incremented xprt->cong */
-       int                     rq_received;    /* receive completed */
+       int                     rq_reply_bytes_recvd;   /* number of reply */
+                                                       /* bytes received */
        u32                     rq_seqno;       /* gss seq no. used on req. */
        int                     rq_enc_pages_num;
        struct page             **rq_enc_pages; /* scratch pages for use by
@@ -79,25 +74,34 @@ struct rpc_rqst {
        void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */
        struct list_head        rq_list;
 
+       __u32 *                 rq_buffer;      /* XDR encode buffer */
+       size_t                  rq_callsize,
+                               rq_rcvsize;
+
        struct xdr_buf          rq_private_buf;         /* The receive buffer
                                                         * used in the softirq.
                                                         */
        unsigned long           rq_majortimeo;  /* major timeout alarm */
        unsigned long           rq_timeout;     /* Current timeout value */
        unsigned int            rq_retries;     /* # of retries */
-       /*
-        * For authentication (e.g. auth_des)
-        */
-       u32                     rq_creddata[2];
+       unsigned int            rq_connect_cookie;
+                                               /* A cookie used to track the
+                                                  state of the transport
+                                                  connection */
        
        /*
         * Partial send handling
         */
-       
        u32                     rq_bytes_sent;  /* Bytes we have sent */
 
        unsigned long           rq_xtime;       /* when transmitted */
        int                     rq_ntrans;
+
+#if defined(CONFIG_NFS_V4_1)
+       struct list_head        rq_bc_list;     /* Callback service list */
+       unsigned long           rq_bc_pa_state; /* Backchannel prealloc state */
+       struct list_head        rq_bc_pa_list;  /* Backchannel prealloc list */
+#endif /* CONFIG_NFS_V4_1 */
 };
 #define rq_svec                        rq_snd_buf.head
 #define rq_slen                        rq_snd_buf.len
@@ -106,35 +110,55 @@ struct rpc_xprt_ops {
        void            (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize);
        int             (*reserve_xprt)(struct rpc_task *task);
        void            (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
+       void            (*rpcbind)(struct rpc_task *task);
+       void            (*set_port)(struct rpc_xprt *xprt, unsigned short port);
        void            (*connect)(struct rpc_task *task);
+       void *          (*buf_alloc)(struct rpc_task *task, size_t size);
+       void            (*buf_free)(void *buffer);
        int             (*send_request)(struct rpc_task *task);
        void            (*set_retrans_timeout)(struct rpc_task *task);
        void            (*timer)(struct rpc_task *task);
        void            (*release_request)(struct rpc_task *task);
        void            (*close)(struct rpc_xprt *xprt);
        void            (*destroy)(struct rpc_xprt *xprt);
+       void            (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq);
+};
+
+/*
+ * RPC transport identifiers
+ *
+ * To preserve compatibility with the historical use of raw IP protocol
+ * id's for transport selection, UDP and TCP identifiers are specified
+ * with the previous values. No such restriction exists for new transports,
+ * except that they may not collide with these values (17 and 6,
+ * respectively).
+ */
+#define XPRT_TRANSPORT_BC       (1 << 31)
+enum xprt_transports {
+       XPRT_TRANSPORT_UDP      = IPPROTO_UDP,
+       XPRT_TRANSPORT_TCP      = IPPROTO_TCP,
+       XPRT_TRANSPORT_BC_TCP   = IPPROTO_TCP | XPRT_TRANSPORT_BC,
+       XPRT_TRANSPORT_RDMA     = 256
 };
 
 struct rpc_xprt {
+       struct kref             kref;           /* Reference count */
        struct rpc_xprt_ops *   ops;            /* transport methods */
-       struct socket *         sock;           /* BSD socket layer */
-       struct sock *           inet;           /* INET layer */
 
-       struct rpc_timeout      timeout;        /* timeout parms */
-       struct sockaddr_in      addr;           /* server address */
+       const struct rpc_timeout *timeout;      /* timeout parms */
+       struct sockaddr_storage addr;           /* server address */
+       size_t                  addrlen;        /* size of server address */
        int                     prot;           /* IP protocol */
 
        unsigned long           cong;           /* current congestion */
        unsigned long           cwnd;           /* congestion window */
 
-       size_t                  rcvsize,        /* transport rcv buffer size */
-                               sndsize;        /* transport send buffer size */
-
        size_t                  max_payload;    /* largest RPC payload size,
                                                   in bytes */
        unsigned int            tsh_size;       /* size of transport specific
                                                   header */
 
+       struct rpc_wait_queue   binding;        /* requests waiting on rpcbind */
        struct rpc_wait_queue   sending;        /* requests waiting to send */
        struct rpc_wait_queue   resend;         /* requests waiting to resend */
        struct rpc_wait_queue   pending;        /* requests in flight */
@@ -145,29 +169,16 @@ struct rpc_xprt {
        unsigned long           state;          /* transport state */
        unsigned char           shutdown   : 1, /* being shut down */
                                resvport   : 1; /* use a reserved port */
+       unsigned int            bind_index;     /* bind function index */
 
        /*
-        * XID
-        */
-       __u32                   xid;            /* Next XID value to use */
-
-       /*
-        * State of TCP reply receive stuff
-        */
-       u32                     tcp_recm,       /* Fragment header */
-                               tcp_xid,        /* Current XID */
-                               tcp_reclen,     /* fragment length */
-                               tcp_offset;     /* fragment offset */
-       unsigned long           tcp_copied,     /* copied to request */
-                               tcp_flags;
-       /*
         * Connection of transports
         */
-       unsigned long           connect_timeout,
-                               bind_timeout,
+       unsigned long           bind_timeout,
                                reestablish_timeout;
-       struct work_struct      connect_worker;
-       unsigned short          port;
+       unsigned int            connect_cookie; /* A cookie that gets bumped
+                                                  every time the transport
+                                                  is reconnected */
 
        /*
         * Disconnection of idle transports
@@ -182,46 +193,91 @@ struct rpc_xprt {
         */
        spinlock_t              transport_lock; /* lock transport info */
        spinlock_t              reserve_lock;   /* lock slot table */
+       u32                     xid;            /* Next XID value to use */
        struct rpc_task *       snd_task;       /* Task blocked in send */
-
+       struct svc_xprt         *bc_xprt;       /* NFSv4.1 backchannel */
+#if defined(CONFIG_NFS_V4_1)
+       struct svc_serv         *bc_serv;       /* The RPC service which will */
+                                               /* process the callback */
+       unsigned int            bc_alloc_count; /* Total number of preallocs */
+       spinlock_t              bc_pa_lock;     /* Protects the preallocated
+                                                * items */
+       struct list_head        bc_pa_list;     /* List of preallocated
+                                                * backchannel rpc_rqst's */
+#endif /* CONFIG_NFS_V4_1 */
        struct list_head        recv;
 
+       struct {
+               unsigned long           bind_count,     /* total number of binds */
+                                       connect_count,  /* total number of connects */
+                                       connect_start,  /* connect start timestamp */
+                                       connect_time,   /* jiffies waiting for connect */
+                                       sends,          /* how many complete requests */
+                                       recvs,          /* how many complete requests */
+                                       bad_xids;       /* lookup_rqst didn't find XID */
 
-       void                    (*old_data_ready)(struct sock *, int);
-       void                    (*old_state_change)(struct sock *);
-       void                    (*old_write_space)(struct sock *);
-};
-
-#define XPRT_LAST_FRAG         (1 << 0)
-#define XPRT_COPY_RECM         (1 << 1)
-#define XPRT_COPY_XID          (1 << 2)
-#define XPRT_COPY_DATA         (1 << 3)
+               unsigned long long      req_u,          /* average requests on the wire */
+                                       bklog_u;        /* backlog queue utilization */
+       } stat;
 
-#ifdef __KERNEL__
+       const char              *address_strings[RPC_DISPLAY_MAX];
+};
 
+#if defined(CONFIG_NFS_V4_1)
 /*
- * Transport operations used by ULPs
+ * Backchannel flags
  */
-struct rpc_xprt *      xprt_create_proto(int proto, struct sockaddr_in *addr, struct rpc_timeout *to);
-void                   xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr);
+#define        RPC_BC_PA_IN_USE        0x0001          /* Preallocated backchannel */
+                                               /* buffer in use */
+#endif /* CONFIG_NFS_V4_1 */
+
+#if defined(CONFIG_NFS_V4_1)
+static inline int bc_prealloc(struct rpc_rqst *req)
+{
+       return test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
+}
+#else
+static inline int bc_prealloc(struct rpc_rqst *req)
+{
+       return 0;
+}
+#endif /* CONFIG_NFS_V4_1 */
+
+struct xprt_create {
+       int                     ident;          /* XPRT_TRANSPORT identifier */
+       struct sockaddr *       srcaddr;        /* optional local address */
+       struct sockaddr *       dstaddr;        /* remote peer address */
+       size_t                  addrlen;
+       struct svc_xprt         *bc_xprt;       /* NFSv4.1 backchannel */
+};
+
+struct xprt_class {
+       struct list_head        list;
+       int                     ident;          /* XPRT_TRANSPORT identifier */
+       struct rpc_xprt *       (*setup)(struct xprt_create *);
+       struct module           *owner;
+       char                    name[32];
+};
 
 /*
  * Generic internal transport functions
  */
+struct rpc_xprt                *xprt_create_transport(struct xprt_create *args);
 void                   xprt_connect(struct rpc_task *task);
 void                   xprt_reserve(struct rpc_task *task);
 int                    xprt_reserve_xprt(struct rpc_task *task);
 int                    xprt_reserve_xprt_cong(struct rpc_task *task);
 int                    xprt_prepare_transmit(struct rpc_task *task);
 void                   xprt_transmit(struct rpc_task *task);
-void                   xprt_abort_transmit(struct rpc_task *task);
+void                   xprt_end_transmit(struct rpc_task *task);
 int                    xprt_adjust_timeout(struct rpc_rqst *req);
 void                   xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task);
 void                   xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
 void                   xprt_release(struct rpc_task *task);
-int                    xprt_destroy(struct rpc_xprt *xprt);
+struct rpc_xprt *      xprt_get(struct rpc_xprt *xprt);
+void                   xprt_put(struct rpc_xprt *xprt);
 
-static inline u32 *xprt_skip_transport_header(struct rpc_xprt *xprt, u32 *p)
+static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p)
 {
        return p + xprt->tsh_size;
 }
@@ -229,23 +285,21 @@ static inline u32 *xprt_skip_transport_header(struct rpc_xprt *xprt, u32 *p)
 /*
  * Transport switch helper functions
  */
+int                    xprt_register_transport(struct xprt_class *type);
+int                    xprt_unregister_transport(struct xprt_class *type);
+int                    xprt_load_transport(const char *);
 void                   xprt_set_retrans_timeout_def(struct rpc_task *task);
 void                   xprt_set_retrans_timeout_rtt(struct rpc_task *task);
 void                   xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status);
-void                   xprt_wait_for_buffer_space(struct rpc_task *task);
+void                   xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action);
 void                   xprt_write_space(struct rpc_xprt *xprt);
-void                   xprt_update_rtt(struct rpc_task *task);
 void                   xprt_adjust_cwnd(struct rpc_task *task, int result);
-struct rpc_rqst *      xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid);
+struct rpc_rqst *      xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid);
 void                   xprt_complete_rqst(struct rpc_task *task, int copied);
 void                   xprt_release_rqst_cong(struct rpc_task *task);
-void                   xprt_disconnect(struct rpc_xprt *xprt);
-
-/*
- * Socket transport setup operations
- */
-int                    xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to);
-int                    xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to);
+void                   xprt_disconnect_done(struct rpc_xprt *xprt);
+void                   xprt_force_disconnect(struct rpc_xprt *xprt);
+void                   xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie);
 
 /*
  * Reserved bit positions in xprt->state
@@ -253,6 +307,12 @@ int                        xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to);
 #define XPRT_LOCKED            (0)
 #define XPRT_CONNECTED         (1)
 #define XPRT_CONNECTING                (2)
+#define XPRT_CLOSE_WAIT                (3)
+#define XPRT_BOUND             (4)
+#define XPRT_BINDING           (5)
+#define XPRT_CLOSING           (6)
+#define XPRT_CONNECTION_ABORT  (7)
+#define XPRT_CONNECTION_CLOSE  (8)
 
 static inline void xprt_set_connected(struct rpc_xprt *xprt)
 {
@@ -296,6 +356,33 @@ static inline int xprt_test_and_set_connecting(struct rpc_xprt *xprt)
        return test_and_set_bit(XPRT_CONNECTING, &xprt->state);
 }
 
+static inline void xprt_set_bound(struct rpc_xprt *xprt)
+{
+       test_and_set_bit(XPRT_BOUND, &xprt->state);
+}
+
+static inline int xprt_bound(struct rpc_xprt *xprt)
+{
+       return test_bit(XPRT_BOUND, &xprt->state);
+}
+
+static inline void xprt_clear_bound(struct rpc_xprt *xprt)
+{
+       clear_bit(XPRT_BOUND, &xprt->state);
+}
+
+static inline void xprt_clear_binding(struct rpc_xprt *xprt)
+{
+       smp_mb__before_clear_bit();
+       clear_bit(XPRT_BINDING, &xprt->state);
+       smp_mb__after_clear_bit();
+}
+
+static inline int xprt_test_and_set_binding(struct rpc_xprt *xprt)
+{
+       return test_and_set_bit(XPRT_BINDING, &xprt->state);
+}
+
 #endif /* __KERNEL__*/
 
 #endif /* _LINUX_SUNRPC_XPRT_H */