[ARM] Kirkwood: Add the watchdog timer as a platform device.
[safe/jmp/linux-2.6] / net / sunrpc / svc_xprt.c
index cac3f82..c200d92 100644 (file)
@@ -6,35 +6,16 @@
 
 #include <linux/sched.h>
 #include <linux/errno.h>
-#include <linux/fcntl.h>
-#include <linux/net.h>
-#include <linux/in.h>
-#include <linux/inet.h>
-#include <linux/udp.h>
-#include <linux/tcp.h>
-#include <linux/unistd.h>
-#include <linux/slab.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <linux/file.h>
 #include <linux/freezer.h>
 #include <linux/kthread.h>
 #include <net/sock.h>
-#include <net/checksum.h>
-#include <net/ip.h>
-#include <net/ipv6.h>
-#include <net/tcp_states.h>
-#include <linux/uaccess.h>
-#include <asm/ioctls.h>
-
-#include <linux/sunrpc/types.h>
-#include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/svc_xprt.h>
 
 #define RPCDBG_FACILITY        RPCDBG_SVCXPRT
 
+#define SVC_MAX_WAKING 5
+
 static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
 static int svc_deferred_recv(struct svc_rqst *rqstp);
 static struct cache_deferred_req *svc_defer(struct cache_req *req);
@@ -180,15 +161,47 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
 }
 EXPORT_SYMBOL_GPL(svc_xprt_init);
 
-int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
-                   int flags)
+static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
+                                        struct svc_serv *serv,
+                                        const int family,
+                                        const unsigned short port,
+                                        int flags)
 {
-       struct svc_xprt_class *xcl;
        struct sockaddr_in sin = {
                .sin_family             = AF_INET,
                .sin_addr.s_addr        = htonl(INADDR_ANY),
                .sin_port               = htons(port),
        };
+       struct sockaddr_in6 sin6 = {
+               .sin6_family            = AF_INET6,
+               .sin6_addr              = IN6ADDR_ANY_INIT,
+               .sin6_port              = htons(port),
+       };
+       struct sockaddr *sap;
+       size_t len;
+
+       switch (family) {
+       case PF_INET:
+               sap = (struct sockaddr *)&sin;
+               len = sizeof(sin);
+               break;
+       case PF_INET6:
+               sap = (struct sockaddr *)&sin6;
+               len = sizeof(sin6);
+               break;
+       default:
+               return ERR_PTR(-EAFNOSUPPORT);
+       }
+
+       return xcl->xcl_ops->xpo_create(serv, sap, len, flags);
+}
+
+int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
+                   const int family, const unsigned short port,
+                   int flags)
+{
+       struct svc_xprt_class *xcl;
+
        dprintk("svc: creating transport %s[%d]\n", xprt_name, port);
        spin_lock(&svc_xprt_class_lock);
        list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) {
@@ -201,9 +214,7 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
                        goto err;
 
                spin_unlock(&svc_xprt_class_lock);
-               newxprt = xcl->xcl_ops->
-                       xpo_create(serv, (struct sockaddr *)&sin, sizeof(sin),
-                                  flags);
+               newxprt = __svc_xpo_create(xcl, serv, family, port, flags);
                if (IS_ERR(newxprt)) {
                        module_put(xcl->xcl_owner);
                        return PTR_ERR(newxprt);
@@ -292,6 +303,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
        struct svc_pool *pool;
        struct svc_rqst *rqstp;
        int cpu;
+       int thread_avail;
 
        if (!(xprt->xpt_flags &
              ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED))))
@@ -303,18 +315,14 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
 
        spin_lock_bh(&pool->sp_lock);
 
-       if (!list_empty(&pool->sp_threads) &&
-           !list_empty(&pool->sp_sockets))
-               printk(KERN_ERR
-                      "svc_xprt_enqueue: "
-                      "threads and transports both waiting??\n");
-
        if (test_bit(XPT_DEAD, &xprt->xpt_flags)) {
                /* Don't enqueue dead transports */
                dprintk("svc: transport %p is dead, not enqueued\n", xprt);
                goto out_unlock;
        }
 
+       pool->sp_stats.packets++;
+
        /* Mark transport as busy. It will remain in this state until
         * the provider calls svc_xprt_received. We update XPT_BUSY
         * atomically because it also guards against trying to enqueue
@@ -347,7 +355,15 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
        }
 
  process:
-       if (!list_empty(&pool->sp_threads)) {
+       /* Work out whether threads are available */
+       thread_avail = !list_empty(&pool->sp_threads);  /* threads are asleep */
+       if (pool->sp_nwaking >= SVC_MAX_WAKING) {
+               /* too many threads are runnable and trying to wake up */
+               thread_avail = 0;
+               pool->sp_stats.overloads_avoided++;
+       }
+
+       if (thread_avail) {
                rqstp = list_entry(pool->sp_threads.next,
                                   struct svc_rqst,
                                   rq_list);
@@ -362,11 +378,15 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
                svc_xprt_get(xprt);
                rqstp->rq_reserved = serv->sv_max_mesg;
                atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
+               rqstp->rq_waking = 1;
+               pool->sp_nwaking++;
+               pool->sp_stats.threads_woken++;
                BUG_ON(xprt->xpt_pool != pool);
                wake_up(&rqstp->rq_wait);
        } else {
                dprintk("svc: transport %p put into queue\n", xprt);
                list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
+               pool->sp_stats.sockets_queued++;
                BUG_ON(xprt->xpt_pool != pool);
        }
 
@@ -434,7 +454,7 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
                svc_xprt_enqueue(xprt);
        }
 }
-EXPORT_SYMBOL(svc_reserve);
+EXPORT_SYMBOL_GPL(svc_reserve);
 
 static void svc_xprt_release(struct svc_rqst *rqstp)
 {
@@ -442,6 +462,9 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
 
        rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp);
 
+       kfree(rqstp->rq_deferred);
+       rqstp->rq_deferred = NULL;
+
        svc_free_res_pages(rqstp);
        rqstp->rq_res.page_len = 0;
        rqstp->rq_res.page_base = 0;
@@ -492,7 +515,7 @@ void svc_wake_up(struct svc_serv *serv)
                spin_unlock_bh(&pool->sp_lock);
        }
 }
-EXPORT_SYMBOL(svc_wake_up);
+EXPORT_SYMBOL_GPL(svc_wake_up);
 
 int svc_port_is_privileged(struct sockaddr *sin)
 {
@@ -509,8 +532,10 @@ int svc_port_is_privileged(struct sockaddr *sin)
 }
 
 /*
- * Make sure that we don't have too many active connections.  If we
- * have, something must be dropped.
+ * Make sure that we don't have too many active connections. If we have,
+ * something must be dropped. It's not clear what will happen if we allow
+ * "too many" connections, but when dealing with network-facing software,
+ * we have to code defensively. Here we do that by imposing hard limits.
  *
  * There's no point in trying to do random drop here for DoS
  * prevention. The NFS clients does 1 reconnect in 15 seconds. An
@@ -519,19 +544,27 @@ int svc_port_is_privileged(struct sockaddr *sin)
  * The only somewhat efficient mechanism would be if drop old
  * connections from the same IP first. But right now we don't even
  * record the client IP in svc_sock.
+ *
+ * single-threaded services that expect a lot of clients will probably
+ * need to set sv_maxconn to override the default value which is based
+ * on the number of threads
  */
 static void svc_check_conn_limits(struct svc_serv *serv)
 {
-       if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) {
+       unsigned int limit = serv->sv_maxconn ? serv->sv_maxconn :
+                               (serv->sv_nrthreads+3) * 20;
+
+       if (serv->sv_tmpcnt > limit) {
                struct svc_xprt *xprt = NULL;
                spin_lock_bh(&serv->sv_lock);
                if (!list_empty(&serv->sv_tempsocks)) {
                        if (net_ratelimit()) {
                                /* Try to help the admin */
                                printk(KERN_NOTICE "%s: too many open  "
-                                      "connections, consider increasing the "
-                                      "number of nfsd threads\n",
-                                      serv->sv_name);
+                                      "connections, consider increasing %s\n",
+                                      serv->sv_name, serv->sv_maxconn ?
+                                      "the max number of connections." :
+                                      "the number of threads.");
                        }
                        /*
                         * Always select the oldest connection. It's not fair,
@@ -566,6 +599,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
        int                     pages;
        struct xdr_buf          *arg;
        DECLARE_WAITQUEUE(wait, current);
+       long                    time_left;
 
        dprintk("svc: server %p waiting for data (to = %ld)\n",
                rqstp, timeout);
@@ -614,6 +648,11 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
                return -EINTR;
 
        spin_lock_bh(&pool->sp_lock);
+       if (rqstp->rq_waking) {
+               rqstp->rq_waking = 0;
+               pool->sp_nwaking--;
+               BUG_ON(pool->sp_nwaking < 0);
+       }
        xprt = svc_xprt_dequeue(pool);
        if (xprt) {
                rqstp->rq_xprt = xprt;
@@ -646,12 +685,14 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
                add_wait_queue(&rqstp->rq_wait, &wait);
                spin_unlock_bh(&pool->sp_lock);
 
-               schedule_timeout(timeout);
+               time_left = schedule_timeout(timeout);
 
                try_to_freeze();
 
                spin_lock_bh(&pool->sp_lock);
                remove_wait_queue(&rqstp->rq_wait, &wait);
+               if (!time_left)
+                       pool->sp_stats.threads_timedout++;
 
                xprt = rqstp->rq_xprt;
                if (!xprt) {
@@ -724,7 +765,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
                serv->sv_stats->netcnt++;
        return len;
 }
-EXPORT_SYMBOL(svc_recv);
+EXPORT_SYMBOL_GPL(svc_recv);
 
 /*
  * Drop request
@@ -734,7 +775,7 @@ void svc_drop(struct svc_rqst *rqstp)
        dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt);
        svc_xprt_release(rqstp);
 }
-EXPORT_SYMBOL(svc_drop);
+EXPORT_SYMBOL_GPL(svc_drop);
 
 /*
  * Return reply to client.
@@ -831,6 +872,11 @@ static void svc_age_temp_xprts(unsigned long closure)
 void svc_delete_xprt(struct svc_xprt *xprt)
 {
        struct svc_serv *serv = xprt->xpt_server;
+       struct svc_deferred_req *dr;
+
+       /* Only do this once */
+       if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags))
+               return;
 
        dprintk("svc: svc_delete_xprt(%p)\n", xprt);
        xprt->xpt_ops->xpo_detach(xprt);
@@ -845,12 +891,16 @@ void svc_delete_xprt(struct svc_xprt *xprt)
         * while still attached to a queue, the queue itself
         * is about to be destroyed (in svc_destroy).
         */
-       if (!test_and_set_bit(XPT_DEAD, &xprt->xpt_flags)) {
-               BUG_ON(atomic_read(&xprt->xpt_ref.refcount) < 2);
-               if (test_bit(XPT_TEMP, &xprt->xpt_flags))
-                       serv->sv_tmpcnt--;
+       if (test_bit(XPT_TEMP, &xprt->xpt_flags))
+               serv->sv_tmpcnt--;
+
+       for (dr = svc_deferred_dequeue(xprt); dr;
+            dr = svc_deferred_dequeue(xprt)) {
                svc_xprt_put(xprt);
+               kfree(dr);
        }
+
+       svc_xprt_put(xprt);
        spin_unlock_bh(&serv->sv_lock);
 }
 
@@ -896,17 +946,19 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
                container_of(dreq, struct svc_deferred_req, handle);
        struct svc_xprt *xprt = dr->xprt;
 
-       if (too_many) {
+       spin_lock(&xprt->xpt_lock);
+       set_bit(XPT_DEFERRED, &xprt->xpt_flags);
+       if (too_many || test_bit(XPT_DEAD, &xprt->xpt_flags)) {
+               spin_unlock(&xprt->xpt_lock);
+               dprintk("revisit canceled\n");
                svc_xprt_put(xprt);
                kfree(dr);
                return;
        }
        dprintk("revisit queued\n");
        dr->xprt = NULL;
-       spin_lock(&xprt->xpt_lock);
        list_add(&dr->handle.recent, &xprt->xpt_deferred);
        spin_unlock(&xprt->xpt_lock);
-       set_bit(XPT_DEFERRED, &xprt->xpt_flags);
        svc_xprt_enqueue(xprt);
        svc_xprt_put(xprt);
 }
@@ -925,7 +977,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
        struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle);
        struct svc_deferred_req *dr;
 
-       if (rqstp->rq_arg.page_len)
+       if (rqstp->rq_arg.page_len || !rqstp->rq_usedeferral)
                return NULL; /* if more than a page, give up FIXME */
        if (rqstp->rq_deferred) {
                dr = rqstp->rq_deferred;
@@ -1003,7 +1055,13 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
        return dr;
 }
 
-/*
+/**
+ * svc_find_xprt - find an RPC transport instance
+ * @serv: pointer to svc_serv to search
+ * @xcl_name: C string containing transport's class name
+ * @af: Address family of transport's local address
+ * @port: transport's IP port number
+ *
  * Return the transport instance pointer for the endpoint accepting
  * connections/peer traffic from the specified transport class,
  * address family and port.
@@ -1012,14 +1070,14 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
  * wild-card, and will result in matching the first transport in the
  * service's list that has a matching class name.
  */
-struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name,
-                              int af, int port)
+struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,
+                              const sa_family_t af, const unsigned short port)
 {
        struct svc_xprt *xprt;
        struct svc_xprt *found = NULL;
 
        /* Sanity check the args */
-       if (!serv || !xcl_name)
+       if (serv == NULL || xcl_name == NULL)
                return found;
 
        spin_lock_bh(&serv->sv_lock);
@@ -1028,7 +1086,7 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name,
                        continue;
                if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family)
                        continue;
-               if (port && port != svc_xprt_local_port(xprt))
+               if (port != 0 && port != svc_xprt_local_port(xprt))
                        continue;
                found = xprt;
                svc_xprt_get(xprt);
@@ -1073,3 +1131,93 @@ int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen)
        return totlen;
 }
 EXPORT_SYMBOL_GPL(svc_xprt_names);
+
+
+/*----------------------------------------------------------------------------*/
+
+static void *svc_pool_stats_start(struct seq_file *m, loff_t *pos)
+{
+       unsigned int pidx = (unsigned int)*pos;
+       struct svc_serv *serv = m->private;
+
+       dprintk("svc_pool_stats_start, *pidx=%u\n", pidx);
+
+       lock_kernel();
+       /* bump up the pseudo refcount while traversing */
+       svc_get(serv);
+       unlock_kernel();
+
+       if (!pidx)
+               return SEQ_START_TOKEN;
+       return (pidx > serv->sv_nrpools ? NULL : &serv->sv_pools[pidx-1]);
+}
+
+static void *svc_pool_stats_next(struct seq_file *m, void *p, loff_t *pos)
+{
+       struct svc_pool *pool = p;
+       struct svc_serv *serv = m->private;
+
+       dprintk("svc_pool_stats_next, *pos=%llu\n", *pos);
+
+       if (p == SEQ_START_TOKEN) {
+               pool = &serv->sv_pools[0];
+       } else {
+               unsigned int pidx = (pool - &serv->sv_pools[0]);
+               if (pidx < serv->sv_nrpools-1)
+                       pool = &serv->sv_pools[pidx+1];
+               else
+                       pool = NULL;
+       }
+       ++*pos;
+       return pool;
+}
+
+static void svc_pool_stats_stop(struct seq_file *m, void *p)
+{
+       struct svc_serv *serv = m->private;
+
+       lock_kernel();
+       /* this function really, really should have been called svc_put() */
+       svc_destroy(serv);
+       unlock_kernel();
+}
+
+static int svc_pool_stats_show(struct seq_file *m, void *p)
+{
+       struct svc_pool *pool = p;
+
+       if (p == SEQ_START_TOKEN) {
+               seq_puts(m, "# pool packets-arrived sockets-enqueued threads-woken overloads-avoided threads-timedout\n");
+               return 0;
+       }
+
+       seq_printf(m, "%u %lu %lu %lu %lu %lu\n",
+               pool->sp_id,
+               pool->sp_stats.packets,
+               pool->sp_stats.sockets_queued,
+               pool->sp_stats.threads_woken,
+               pool->sp_stats.overloads_avoided,
+               pool->sp_stats.threads_timedout);
+
+       return 0;
+}
+
+static const struct seq_operations svc_pool_stats_seq_ops = {
+       .start  = svc_pool_stats_start,
+       .next   = svc_pool_stats_next,
+       .stop   = svc_pool_stats_stop,
+       .show   = svc_pool_stats_show,
+};
+
+int svc_pool_stats_open(struct svc_serv *serv, struct file *file)
+{
+       int err;
+
+       err = seq_open(file, &svc_pool_stats_seq_ops);
+       if (!err)
+               ((struct seq_file *) file->private_data)->private = serv;
+       return err;
+}
+EXPORT_SYMBOL(svc_pool_stats_open);
+
+/*----------------------------------------------------------------------------*/