SAFE public projects git trees. - safe/jmp/linux-2.6/blob - net/sunrpc/svc_xprt.c

   1 /*
   2  * linux/net/sunrpc/svc_xprt.c
   3  *
   4  * Author: Tom Tucker <tom@opengridcomputing.com>
   5  */
   6
   7 #include <linux/sched.h>
   8 #include <linux/errno.h>
   9 #include <linux/freezer.h>
  10 #include <linux/kthread.h>
  11 #include <net/sock.h>
  12 #include <linux/sunrpc/stats.h>
  13 #include <linux/sunrpc/svc_xprt.h>
  14
  15 #define RPCDBG_FACILITY RPCDBG_SVCXPRT
  16
  17 #define SVC_MAX_WAKING 5
  18
  19 static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
  20 static int svc_deferred_recv(struct svc_rqst *rqstp);
  21 static struct cache_deferred_req *svc_defer(struct cache_req *req);
  22 static void svc_age_temp_xprts(unsigned long closure);
  23
  24 /* apparently the "standard" is that clients close
  25  * idle connections after 5 minutes, servers after
  26  * 6 minutes
  27  *   http://www.connectathon.org/talks96/nfstcp.pdf
  28  */
  29 static int svc_conn_age_period = 6*60;
  30
  31 /* List of registered transport classes */
  32 static DEFINE_SPINLOCK(svc_xprt_class_lock);
  33 static LIST_HEAD(svc_xprt_class_list);
  34
  35 /* SMP locking strategy:
  36  *
  37  *      svc_pool->sp_lock protects most of the fields of that pool.
  38  *      svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt.
  39  *      when both need to be taken (rare), svc_serv->sv_lock is first.
  40  *      BKL protects svc_serv->sv_nrthread.
  41  *      svc_sock->sk_lock protects the svc_sock->sk_deferred list
  42  *             and the ->sk_info_authunix cache.
  43  *
  44  *      The XPT_BUSY bit in xprt->xpt_flags prevents a transport being
  45  *      enqueued multiply. During normal transport processing this bit
  46  *      is set by svc_xprt_enqueue and cleared by svc_xprt_received.
  47  *      Providers should not manipulate this bit directly.
  48  *
  49  *      Some flags can be set to certain values at any time
  50  *      providing that certain rules are followed:
  51  *
  52  *      XPT_CONN, XPT_DATA:
  53  *              - Can be set or cleared at any time.
  54  *              - After a set, svc_xprt_enqueue must be called to enqueue
  55  *                the transport for processing.
  56  *              - After a clear, the transport must be read/accepted.
  57  *                If this succeeds, it must be set again.
  58  *      XPT_CLOSE:
  59  *              - Can set at any time. It is never cleared.
  60  *      XPT_DEAD:
  61  *              - Can only be set while XPT_BUSY is held which ensures
  62  *                that no other thread will be using the transport or will
  63  *                try to set XPT_DEAD.
  64  */
  65
  66 int svc_reg_xprt_class(struct svc_xprt_class *xcl)
  67 {
  68         struct svc_xprt_class *cl;
  69         int res = -EEXIST;
  70
  71         dprintk("svc: Adding svc transport class '%s'\n", xcl->xcl_name);
  72
  73         INIT_LIST_HEAD(&xcl->xcl_list);
  74         spin_lock(&svc_xprt_class_lock);
  75         /* Make sure there isn't already a class with the same name */
  76         list_for_each_entry(cl, &svc_xprt_class_list, xcl_list) {
  77                 if (strcmp(xcl->xcl_name, cl->xcl_name) == 0)
  78                         goto out;
  79         }
  80         list_add_tail(&xcl->xcl_list, &svc_xprt_class_list);
  81         res = 0;
  82 out:
  83         spin_unlock(&svc_xprt_class_lock);
  84         return res;
  85 }
  86 EXPORT_SYMBOL_GPL(svc_reg_xprt_class);
  87
  88 void svc_unreg_xprt_class(struct svc_xprt_class *xcl)
  89 {
  90         dprintk("svc: Removing svc transport class '%s'\n", xcl->xcl_name);
  91         spin_lock(&svc_xprt_class_lock);
  92         list_del_init(&xcl->xcl_list);
  93         spin_unlock(&svc_xprt_class_lock);
  94 }
  95 EXPORT_SYMBOL_GPL(svc_unreg_xprt_class);
  96
  97 /*
  98  * Format the transport list for printing
  99  */
 100 int svc_print_xprts(char *buf, int maxlen)
 101 {
 102         struct list_head *le;
 103         char tmpstr[80];
 104         int len = 0;
 105         buf[0] = '\0';
 106
 107         spin_lock(&svc_xprt_class_lock);
 108         list_for_each(le, &svc_xprt_class_list) {
 109                 int slen;
 110                 struct svc_xprt_class *xcl =
 111                         list_entry(le, struct svc_xprt_class, xcl_list);
 112
 113                 sprintf(tmpstr, "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload);
 114                 slen = strlen(tmpstr);
 115                 if (len + slen > maxlen)
 116                         break;
 117                 len += slen;
 118                 strcat(buf, tmpstr);
 119         }
 120         spin_unlock(&svc_xprt_class_lock);
 121
 122         return len;
 123 }
 124
 125 static void svc_xprt_free(struct kref *kref)
 126 {
 127         struct svc_xprt *xprt =
 128                 container_of(kref, struct svc_xprt, xpt_ref);
 129         struct module *owner = xprt->xpt_class->xcl_owner;
 130         if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)
 131             && xprt->xpt_auth_cache != NULL)
 132                 svcauth_unix_info_release(xprt->xpt_auth_cache);
 133         xprt->xpt_ops->xpo_free(xprt);
 134         module_put(owner);
 135 }
 136
 137 void svc_xprt_put(struct svc_xprt *xprt)
 138 {
 139         kref_put(&xprt->xpt_ref, svc_xprt_free);
 140 }
 141 EXPORT_SYMBOL_GPL(svc_xprt_put);
 142
 143 /*
 144  * Called by transport drivers to initialize the transport independent
 145  * portion of the transport instance.
 146  */
 147 void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
 148                    struct svc_serv *serv)
 149 {
 150         memset(xprt, 0, sizeof(*xprt));
 151         xprt->xpt_class = xcl;
 152         xprt->xpt_ops = xcl->xcl_ops;
 153         kref_init(&xprt->xpt_ref);
 154         xprt->xpt_server = serv;
 155         INIT_LIST_HEAD(&xprt->xpt_list);
 156         INIT_LIST_HEAD(&xprt->xpt_ready);
 157         INIT_LIST_HEAD(&xprt->xpt_deferred);
 158         mutex_init(&xprt->xpt_mutex);
 159         spin_lock_init(&xprt->xpt_lock);
 160         set_bit(XPT_BUSY, &xprt->xpt_flags);
 161 }
 162 EXPORT_SYMBOL_GPL(svc_xprt_init);
 163
 164 static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
 165                                          struct svc_serv *serv,
 166                                          unsigned short port, int flags)
 167 {
 168         struct sockaddr_in sin = {
 169                 .sin_family             = AF_INET,
 170                 .sin_addr.s_addr        = htonl(INADDR_ANY),
 171                 .sin_port               = htons(port),
 172         };
 173         struct sockaddr_in6 sin6 = {
 174                 .sin6_family            = AF_INET6,
 175                 .sin6_addr              = IN6ADDR_ANY_INIT,
 176                 .sin6_port              = htons(port),
 177         };
 178         struct sockaddr *sap;
 179         size_t len;
 180
 181         switch (serv->sv_family) {
 182         case AF_INET:
 183                 sap = (struct sockaddr *)&sin;
 184                 len = sizeof(sin);
 185                 break;
 186         case AF_INET6:
 187                 sap = (struct sockaddr *)&sin6;
 188                 len = sizeof(sin6);
 189                 break;
 190         default:
 191                 return ERR_PTR(-EAFNOSUPPORT);
 192         }
 193
 194         return xcl->xcl_ops->xpo_create(serv, sap, len, flags);
 195 }
 196
 197 int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
 198                     int flags)
 199 {
 200         struct svc_xprt_class *xcl;
 201
 202         dprintk("svc: creating transport %s[%d]\n", xprt_name, port);
 203         spin_lock(&svc_xprt_class_lock);
 204         list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) {
 205                 struct svc_xprt *newxprt;
 206
 207                 if (strcmp(xprt_name, xcl->xcl_name))
 208                         continue;
 209
 210                 if (!try_module_get(xcl->xcl_owner))
 211                         goto err;
 212
 213                 spin_unlock(&svc_xprt_class_lock);
 214                 newxprt = __svc_xpo_create(xcl, serv, port, flags);
 215                 if (IS_ERR(newxprt)) {
 216                         module_put(xcl->xcl_owner);
 217                         return PTR_ERR(newxprt);
 218                 }
 219
 220                 clear_bit(XPT_TEMP, &newxprt->xpt_flags);
 221                 spin_lock_bh(&serv->sv_lock);
 222                 list_add(&newxprt->xpt_list, &serv->sv_permsocks);
 223                 spin_unlock_bh(&serv->sv_lock);
 224                 clear_bit(XPT_BUSY, &newxprt->xpt_flags);
 225                 return svc_xprt_local_port(newxprt);
 226         }
 227  err:
 228         spin_unlock(&svc_xprt_class_lock);
 229         dprintk("svc: transport %s not found\n", xprt_name);
 230         return -ENOENT;
 231 }
 232 EXPORT_SYMBOL_GPL(svc_create_xprt);
 233
 234 /*
 235  * Copy the local and remote xprt addresses to the rqstp structure
 236  */
 237 void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt)
 238 {
 239         struct sockaddr *sin;
 240
 241         memcpy(&rqstp->rq_addr, &xprt->xpt_remote, xprt->xpt_remotelen);
 242         rqstp->rq_addrlen = xprt->xpt_remotelen;
 243
 244         /*
 245          * Destination address in request is needed for binding the
 246          * source address in RPC replies/callbacks later.
 247          */
 248         sin = (struct sockaddr *)&xprt->xpt_local;
 249         switch (sin->sa_family) {
 250         case AF_INET:
 251                 rqstp->rq_daddr.addr = ((struct sockaddr_in *)sin)->sin_addr;
 252                 break;
 253         case AF_INET6:
 254                 rqstp->rq_daddr.addr6 = ((struct sockaddr_in6 *)sin)->sin6_addr;
 255                 break;
 256         }
 257 }
 258 EXPORT_SYMBOL_GPL(svc_xprt_copy_addrs);
 259
 260 /**
 261  * svc_print_addr - Format rq_addr field for printing
 262  * @rqstp: svc_rqst struct containing address to print
 263  * @buf: target buffer for formatted address
 264  * @len: length of target buffer
 265  *
 266  */
 267 char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len)
 268 {
 269         return __svc_print_addr(svc_addr(rqstp), buf, len);
 270 }
 271 EXPORT_SYMBOL_GPL(svc_print_addr);
 272
 273 /*
 274  * Queue up an idle server thread.  Must have pool->sp_lock held.
 275  * Note: this is really a stack rather than a queue, so that we only
 276  * use as many different threads as we need, and the rest don't pollute
 277  * the cache.
 278  */
 279 static void svc_thread_enqueue(struct svc_pool *pool, struct svc_rqst *rqstp)
 280 {
 281         list_add(&rqstp->rq_list, &pool->sp_threads);
 282 }
 283
 284 /*
 285  * Dequeue an nfsd thread.  Must have pool->sp_lock held.
 286  */
 287 static void svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp)
 288 {
 289         list_del(&rqstp->rq_list);
 290 }
 291
 292 /*
 293  * Queue up a transport with data pending. If there are idle nfsd
 294  * processes, wake 'em up.
 295  *
 296  */
 297 void svc_xprt_enqueue(struct svc_xprt *xprt)
 298 {
 299         struct svc_serv *serv = xprt->xpt_server;
 300         struct svc_pool *pool;
 301         struct svc_rqst *rqstp;
 302         int cpu;
 303         int thread_avail;
 304
 305         if (!(xprt->xpt_flags &
 306               ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED))))
 307                 return;
 308
 309         cpu = get_cpu();
 310         pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
 311         put_cpu();
 312
 313         spin_lock_bh(&pool->sp_lock);
 314
 315         if (test_bit(XPT_DEAD, &xprt->xpt_flags)) {
 316                 /* Don't enqueue dead transports */
 317                 dprintk("svc: transport %p is dead, not enqueued\n", xprt);
 318                 goto out_unlock;
 319         }
 320
 321         /* Mark transport as busy. It will remain in this state until
 322          * the provider calls svc_xprt_received. We update XPT_BUSY
 323          * atomically because it also guards against trying to enqueue
 324          * the transport twice.
 325          */
 326         if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) {
 327                 /* Don't enqueue transport while already enqueued */
 328                 dprintk("svc: transport %p busy, not enqueued\n", xprt);
 329                 goto out_unlock;
 330         }
 331         BUG_ON(xprt->xpt_pool != NULL);
 332         xprt->xpt_pool = pool;
 333
 334         /* Handle pending connection */
 335         if (test_bit(XPT_CONN, &xprt->xpt_flags))
 336                 goto process;
 337
 338         /* Handle close in-progress */
 339         if (test_bit(XPT_CLOSE, &xprt->xpt_flags))
 340                 goto process;
 341
 342         /* Check if we have space to reply to a request */
 343         if (!xprt->xpt_ops->xpo_has_wspace(xprt)) {
 344                 /* Don't enqueue while not enough space for reply */
 345                 dprintk("svc: no write space, transport %p  not enqueued\n",
 346                         xprt);
 347                 xprt->xpt_pool = NULL;
 348                 clear_bit(XPT_BUSY, &xprt->xpt_flags);
 349                 goto out_unlock;
 350         }
 351
 352  process:
 353         /* Work out whether threads are available */
 354         thread_avail = !list_empty(&pool->sp_threads);  /* threads are asleep */
 355         if (pool->sp_nwaking >= SVC_MAX_WAKING) {
 356                 /* too many threads are runnable and trying to wake up */
 357                 thread_avail = 0;
 358         }
 359
 360         if (thread_avail) {
 361                 rqstp = list_entry(pool->sp_threads.next,
 362                                    struct svc_rqst,
 363                                    rq_list);
 364                 dprintk("svc: transport %p served by daemon %p\n",
 365                         xprt, rqstp);
 366                 svc_thread_dequeue(pool, rqstp);
 367                 if (rqstp->rq_xprt)
 368                         printk(KERN_ERR
 369                                 "svc_xprt_enqueue: server %p, rq_xprt=%p!\n",
 370                                 rqstp, rqstp->rq_xprt);
 371                 rqstp->rq_xprt = xprt;
 372                 svc_xprt_get(xprt);
 373                 rqstp->rq_reserved = serv->sv_max_mesg;
 374                 atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
 375                 rqstp->rq_waking = 1;
 376                 pool->sp_nwaking++;
 377                 BUG_ON(xprt->xpt_pool != pool);
 378                 wake_up(&rqstp->rq_wait);
 379         } else {
 380                 dprintk("svc: transport %p put into queue\n", xprt);
 381                 list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
 382                 BUG_ON(xprt->xpt_pool != pool);
 383         }
 384
 385 out_unlock:
 386         spin_unlock_bh(&pool->sp_lock);
 387 }
 388 EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
 389
 390 /*
 391  * Dequeue the first transport.  Must be called with the pool->sp_lock held.
 392  */
 393 static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool)
 394 {
 395         struct svc_xprt *xprt;
 396
 397         if (list_empty(&pool->sp_sockets))
 398                 return NULL;
 399
 400         xprt = list_entry(pool->sp_sockets.next,
 401                           struct svc_xprt, xpt_ready);
 402         list_del_init(&xprt->xpt_ready);
 403
 404         dprintk("svc: transport %p dequeued, inuse=%d\n",
 405                 xprt, atomic_read(&xprt->xpt_ref.refcount));
 406
 407         return xprt;
 408 }
 409
 410 /*
 411  * svc_xprt_received conditionally queues the transport for processing
 412  * by another thread. The caller must hold the XPT_BUSY bit and must
 413  * not thereafter touch transport data.
 414  *
 415  * Note: XPT_DATA only gets cleared when a read-attempt finds no (or
 416  * insufficient) data.
 417  */
 418 void svc_xprt_received(struct svc_xprt *xprt)
 419 {
 420         BUG_ON(!test_bit(XPT_BUSY, &xprt->xpt_flags));
 421         xprt->xpt_pool = NULL;
 422         clear_bit(XPT_BUSY, &xprt->xpt_flags);
 423         svc_xprt_enqueue(xprt);
 424 }
 425 EXPORT_SYMBOL_GPL(svc_xprt_received);
 426
 427 /**
 428  * svc_reserve - change the space reserved for the reply to a request.
 429  * @rqstp:  The request in question
 430  * @space: new max space to reserve
 431  *
 432  * Each request reserves some space on the output queue of the transport
 433  * to make sure the reply fits.  This function reduces that reserved
 434  * space to be the amount of space used already, plus @space.
 435  *
 436  */
 437 void svc_reserve(struct svc_rqst *rqstp, int space)
 438 {
 439         space += rqstp->rq_res.head[0].iov_len;
 440
 441         if (space < rqstp->rq_reserved) {
 442                 struct svc_xprt *xprt = rqstp->rq_xprt;
 443                 atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved);
 444                 rqstp->rq_reserved = space;
 445
 446                 svc_xprt_enqueue(xprt);
 447         }
 448 }
 449 EXPORT_SYMBOL_GPL(svc_reserve);
 450
 451 static void svc_xprt_release(struct svc_rqst *rqstp)
 452 {
 453         struct svc_xprt *xprt = rqstp->rq_xprt;
 454
 455         rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp);
 456
 457         kfree(rqstp->rq_deferred);
 458         rqstp->rq_deferred = NULL;
 459
 460         svc_free_res_pages(rqstp);
 461         rqstp->rq_res.page_len = 0;
 462         rqstp->rq_res.page_base = 0;
 463
 464         /* Reset response buffer and release
 465          * the reservation.
 466          * But first, check that enough space was reserved
 467          * for the reply, otherwise we have a bug!
 468          */
 469         if ((rqstp->rq_res.len) >  rqstp->rq_reserved)
 470                 printk(KERN_ERR "RPC request reserved %d but used %d\n",
 471                        rqstp->rq_reserved,
 472                        rqstp->rq_res.len);
 473
 474         rqstp->rq_res.head[0].iov_len = 0;
 475         svc_reserve(rqstp, 0);
 476         rqstp->rq_xprt = NULL;
 477
 478         svc_xprt_put(xprt);
 479 }
 480
 481 /*
 482  * External function to wake up a server waiting for data
 483  * This really only makes sense for services like lockd
 484  * which have exactly one thread anyway.
 485  */
 486 void svc_wake_up(struct svc_serv *serv)
 487 {
 488         struct svc_rqst *rqstp;
 489         unsigned int i;
 490         struct svc_pool *pool;
 491
 492         for (i = 0; i < serv->sv_nrpools; i++) {
 493                 pool = &serv->sv_pools[i];
 494
 495                 spin_lock_bh(&pool->sp_lock);
 496                 if (!list_empty(&pool->sp_threads)) {
 497                         rqstp = list_entry(pool->sp_threads.next,
 498                                            struct svc_rqst,
 499                                            rq_list);
 500                         dprintk("svc: daemon %p woken up.\n", rqstp);
 501                         /*
 502                         svc_thread_dequeue(pool, rqstp);
 503                         rqstp->rq_xprt = NULL;
 504                          */
 505                         wake_up(&rqstp->rq_wait);
 506                 }
 507                 spin_unlock_bh(&pool->sp_lock);
 508         }
 509 }
 510 EXPORT_SYMBOL_GPL(svc_wake_up);
 511
 512 int svc_port_is_privileged(struct sockaddr *sin)
 513 {
 514         switch (sin->sa_family) {
 515         case AF_INET:
 516                 return ntohs(((struct sockaddr_in *)sin)->sin_port)
 517                         < PROT_SOCK;
 518         case AF_INET6:
 519                 return ntohs(((struct sockaddr_in6 *)sin)->sin6_port)
 520                         < PROT_SOCK;
 521         default:
 522                 return 0;
 523         }
 524 }
 525
 526 /*
 527  * Make sure that we don't have too many active connections. If we have,
 528  * something must be dropped. It's not clear what will happen if we allow
 529  * "too many" connections, but when dealing with network-facing software,
 530  * we have to code defensively. Here we do that by imposing hard limits.
 531  *
 532  * There's no point in trying to do random drop here for DoS
 533  * prevention. The NFS clients does 1 reconnect in 15 seconds. An
 534  * attacker can easily beat that.
 535  *
 536  * The only somewhat efficient mechanism would be if drop old
 537  * connections from the same IP first. But right now we don't even
 538  * record the client IP in svc_sock.
 539  *
 540  * single-threaded services that expect a lot of clients will probably
 541  * need to set sv_maxconn to override the default value which is based
 542  * on the number of threads
 543  */
 544 static void svc_check_conn_limits(struct svc_serv *serv)
 545 {
 546         unsigned int limit = serv->sv_maxconn ? serv->sv_maxconn :
 547                                 (serv->sv_nrthreads+3) * 20;
 548
 549         if (serv->sv_tmpcnt > limit) {
 550                 struct svc_xprt *xprt = NULL;
 551                 spin_lock_bh(&serv->sv_lock);
 552                 if (!list_empty(&serv->sv_tempsocks)) {
 553                         if (net_ratelimit()) {
 554                                 /* Try to help the admin */
 555                                 printk(KERN_NOTICE "%s: too many open  "
 556                                        "connections, consider increasing %s\n",
 557                                        serv->sv_name, serv->sv_maxconn ?
 558                                        "the max number of connections." :
 559                                        "the number of threads.");
 560                         }
 561                         /*
 562                          * Always select the oldest connection. It's not fair,
 563                          * but so is life
 564                          */
 565                         xprt = list_entry(serv->sv_tempsocks.prev,
 566                                           struct svc_xprt,
 567                                           xpt_list);
 568                         set_bit(XPT_CLOSE, &xprt->xpt_flags);
 569                         svc_xprt_get(xprt);
 570                 }
 571                 spin_unlock_bh(&serv->sv_lock);
 572
 573                 if (xprt) {
 574                         svc_xprt_enqueue(xprt);
 575                         svc_xprt_put(xprt);
 576                 }
 577         }
 578 }
 579
 580 /*
 581  * Receive the next request on any transport.  This code is carefully
 582  * organised not to touch any cachelines in the shared svc_serv
 583  * structure, only cachelines in the local svc_pool.
 584  */
 585 int svc_recv(struct svc_rqst *rqstp, long timeout)
 586 {
 587         struct svc_xprt         *xprt = NULL;
 588         struct svc_serv         *serv = rqstp->rq_server;
 589         struct svc_pool         *pool = rqstp->rq_pool;
 590         int                     len, i;
 591         int                     pages;
 592         struct xdr_buf          *arg;
 593         DECLARE_WAITQUEUE(wait, current);
 594
 595         dprintk("svc: server %p waiting for data (to = %ld)\n",
 596                 rqstp, timeout);
 597
 598         if (rqstp->rq_xprt)
 599                 printk(KERN_ERR
 600                         "svc_recv: service %p, transport not NULL!\n",
 601                          rqstp);
 602         if (waitqueue_active(&rqstp->rq_wait))
 603                 printk(KERN_ERR
 604                         "svc_recv: service %p, wait queue active!\n",
 605                          rqstp);
 606
 607         /* now allocate needed pages.  If we get a failure, sleep briefly */
 608         pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE;
 609         for (i = 0; i < pages ; i++)
 610                 while (rqstp->rq_pages[i] == NULL) {
 611                         struct page *p = alloc_page(GFP_KERNEL);
 612                         if (!p) {
 613                                 set_current_state(TASK_INTERRUPTIBLE);
 614                                 if (signalled() || kthread_should_stop()) {
 615                                         set_current_state(TASK_RUNNING);
 616                                         return -EINTR;
 617                                 }
 618                                 schedule_timeout(msecs_to_jiffies(500));
 619                         }
 620                         rqstp->rq_pages[i] = p;
 621                 }
 622         rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */
 623         BUG_ON(pages >= RPCSVC_MAXPAGES);
 624
 625         /* Make arg->head point to first page and arg->pages point to rest */
 626         arg = &rqstp->rq_arg;
 627         arg->head[0].iov_base = page_address(rqstp->rq_pages[0]);
 628         arg->head[0].iov_len = PAGE_SIZE;
 629         arg->pages = rqstp->rq_pages + 1;
 630         arg->page_base = 0;
 631         /* save at least one page for response */
 632         arg->page_len = (pages-2)*PAGE_SIZE;
 633         arg->len = (pages-1)*PAGE_SIZE;
 634         arg->tail[0].iov_len = 0;
 635
 636         try_to_freeze();
 637         cond_resched();
 638         if (signalled() || kthread_should_stop())
 639                 return -EINTR;
 640
 641         spin_lock_bh(&pool->sp_lock);
 642         if (rqstp->rq_waking) {
 643                 rqstp->rq_waking = 0;
 644                 pool->sp_nwaking--;
 645                 BUG_ON(pool->sp_nwaking < 0);
 646         }
 647         xprt = svc_xprt_dequeue(pool);
 648         if (xprt) {
 649                 rqstp->rq_xprt = xprt;
 650                 svc_xprt_get(xprt);
 651                 rqstp->rq_reserved = serv->sv_max_mesg;
 652                 atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
 653         } else {
 654                 /* No data pending. Go to sleep */
 655                 svc_thread_enqueue(pool, rqstp);
 656
 657                 /*
 658                  * We have to be able to interrupt this wait
 659                  * to bring down the daemons ...
 660                  */
 661                 set_current_state(TASK_INTERRUPTIBLE);
 662
 663                 /*
 664                  * checking kthread_should_stop() here allows us to avoid
 665                  * locking and signalling when stopping kthreads that call
 666                  * svc_recv. If the thread has already been woken up, then
 667                  * we can exit here without sleeping. If not, then it
 668                  * it'll be woken up quickly during the schedule_timeout
 669                  */
 670                 if (kthread_should_stop()) {
 671                         set_current_state(TASK_RUNNING);
 672                         spin_unlock_bh(&pool->sp_lock);
 673                         return -EINTR;
 674                 }
 675
 676                 add_wait_queue(&rqstp->rq_wait, &wait);
 677                 spin_unlock_bh(&pool->sp_lock);
 678
 679                 schedule_timeout(timeout);
 680
 681                 try_to_freeze();
 682
 683                 spin_lock_bh(&pool->sp_lock);
 684                 remove_wait_queue(&rqstp->rq_wait, &wait);
 685
 686                 xprt = rqstp->rq_xprt;
 687                 if (!xprt) {
 688                         svc_thread_dequeue(pool, rqstp);
 689                         spin_unlock_bh(&pool->sp_lock);
 690                         dprintk("svc: server %p, no data yet\n", rqstp);
 691                         if (signalled() || kthread_should_stop())
 692                                 return -EINTR;
 693                         else
 694                                 return -EAGAIN;
 695                 }
 696         }
 697         spin_unlock_bh(&pool->sp_lock);
 698
 699         len = 0;
 700         if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
 701                 dprintk("svc_recv: found XPT_CLOSE\n");
 702                 svc_delete_xprt(xprt);
 703         } else if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
 704                 struct svc_xprt *newxpt;
 705                 newxpt = xprt->xpt_ops->xpo_accept(xprt);
 706                 if (newxpt) {
 707                         /*
 708                          * We know this module_get will succeed because the
 709                          * listener holds a reference too
 710                          */
 711                         __module_get(newxpt->xpt_class->xcl_owner);
 712                         svc_check_conn_limits(xprt->xpt_server);
 713                         spin_lock_bh(&serv->sv_lock);
 714                         set_bit(XPT_TEMP, &newxpt->xpt_flags);
 715                         list_add(&newxpt->xpt_list, &serv->sv_tempsocks);
 716                         serv->sv_tmpcnt++;
 717                         if (serv->sv_temptimer.function == NULL) {
 718                                 /* setup timer to age temp transports */
 719                                 setup_timer(&serv->sv_temptimer,
 720                                             svc_age_temp_xprts,
 721                                             (unsigned long)serv);
 722                                 mod_timer(&serv->sv_temptimer,
 723                                           jiffies + svc_conn_age_period * HZ);
 724                         }
 725                         spin_unlock_bh(&serv->sv_lock);
 726                         svc_xprt_received(newxpt);
 727                 }
 728                 svc_xprt_received(xprt);
 729         } else {
 730                 dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
 731                         rqstp, pool->sp_id, xprt,
 732                         atomic_read(&xprt->xpt_ref.refcount));
 733                 rqstp->rq_deferred = svc_deferred_dequeue(xprt);
 734                 if (rqstp->rq_deferred) {
 735                         svc_xprt_received(xprt);
 736                         len = svc_deferred_recv(rqstp);
 737                 } else
 738                         len = xprt->xpt_ops->xpo_recvfrom(rqstp);
 739                 dprintk("svc: got len=%d\n", len);
 740         }
 741
 742         /* No data, incomplete (TCP) read, or accept() */
 743         if (len == 0 || len == -EAGAIN) {
 744                 rqstp->rq_res.len = 0;
 745                 svc_xprt_release(rqstp);
 746                 return -EAGAIN;
 747         }
 748         clear_bit(XPT_OLD, &xprt->xpt_flags);
 749
 750         rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp));
 751         rqstp->rq_chandle.defer = svc_defer;
 752
 753         if (serv->sv_stats)
 754                 serv->sv_stats->netcnt++;
 755         return len;
 756 }
 757 EXPORT_SYMBOL_GPL(svc_recv);
 758
 759 /*
 760  * Drop request
 761  */
 762 void svc_drop(struct svc_rqst *rqstp)
 763 {
 764         dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt);
 765         svc_xprt_release(rqstp);
 766 }
 767 EXPORT_SYMBOL_GPL(svc_drop);
 768
 769 /*
 770  * Return reply to client.
 771  */
 772 int svc_send(struct svc_rqst *rqstp)
 773 {
 774         struct svc_xprt *xprt;
 775         int             len;
 776         struct xdr_buf  *xb;
 777
 778         xprt = rqstp->rq_xprt;
 779         if (!xprt)
 780                 return -EFAULT;
 781
 782         /* release the receive skb before sending the reply */
 783         rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp);
 784
 785         /* calculate over-all length */
 786         xb = &rqstp->rq_res;
 787         xb->len = xb->head[0].iov_len +
 788                 xb->page_len +
 789                 xb->tail[0].iov_len;
 790
 791         /* Grab mutex to serialize outgoing data. */
 792         mutex_lock(&xprt->xpt_mutex);
 793         if (test_bit(XPT_DEAD, &xprt->xpt_flags))
 794                 len = -ENOTCONN;
 795         else
 796                 len = xprt->xpt_ops->xpo_sendto(rqstp);
 797         mutex_unlock(&xprt->xpt_mutex);
 798         svc_xprt_release(rqstp);
 799
 800         if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN)
 801                 return 0;
 802         return len;
 803 }
 804
 805 /*
 806  * Timer function to close old temporary transports, using
 807  * a mark-and-sweep algorithm.
 808  */
 809 static void svc_age_temp_xprts(unsigned long closure)
 810 {
 811         struct svc_serv *serv = (struct svc_serv *)closure;
 812         struct svc_xprt *xprt;
 813         struct list_head *le, *next;
 814         LIST_HEAD(to_be_aged);
 815
 816         dprintk("svc_age_temp_xprts\n");
 817
 818         if (!spin_trylock_bh(&serv->sv_lock)) {
 819                 /* busy, try again 1 sec later */
 820                 dprintk("svc_age_temp_xprts: busy\n");
 821                 mod_timer(&serv->sv_temptimer, jiffies + HZ);
 822                 return;
 823         }
 824
 825         list_for_each_safe(le, next, &serv->sv_tempsocks) {
 826                 xprt = list_entry(le, struct svc_xprt, xpt_list);
 827
 828                 /* First time through, just mark it OLD. Second time
 829                  * through, close it. */
 830                 if (!test_and_set_bit(XPT_OLD, &xprt->xpt_flags))
 831                         continue;
 832                 if (atomic_read(&xprt->xpt_ref.refcount) > 1
 833                     || test_bit(XPT_BUSY, &xprt->xpt_flags))
 834                         continue;
 835                 svc_xprt_get(xprt);
 836                 list_move(le, &to_be_aged);
 837                 set_bit(XPT_CLOSE, &xprt->xpt_flags);
 838                 set_bit(XPT_DETACHED, &xprt->xpt_flags);
 839         }
 840         spin_unlock_bh(&serv->sv_lock);
 841
 842         while (!list_empty(&to_be_aged)) {
 843                 le = to_be_aged.next;
 844                 /* fiddling the xpt_list node is safe 'cos we're XPT_DETACHED */
 845                 list_del_init(le);
 846                 xprt = list_entry(le, struct svc_xprt, xpt_list);
 847
 848                 dprintk("queuing xprt %p for closing\n", xprt);
 849
 850                 /* a thread will dequeue and close it soon */
 851                 svc_xprt_enqueue(xprt);
 852                 svc_xprt_put(xprt);
 853         }
 854
 855         mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ);
 856 }
 857
 858 /*
 859  * Remove a dead transport
 860  */
 861 void svc_delete_xprt(struct svc_xprt *xprt)
 862 {
 863         struct svc_serv *serv = xprt->xpt_server;
 864         struct svc_deferred_req *dr;
 865
 866         /* Only do this once */
 867         if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags))
 868                 return;
 869
 870         dprintk("svc: svc_delete_xprt(%p)\n", xprt);
 871         xprt->xpt_ops->xpo_detach(xprt);
 872
 873         spin_lock_bh(&serv->sv_lock);
 874         if (!test_and_set_bit(XPT_DETACHED, &xprt->xpt_flags))
 875                 list_del_init(&xprt->xpt_list);
 876         /*
 877          * We used to delete the transport from whichever list
 878          * it's sk_xprt.xpt_ready node was on, but we don't actually
 879          * need to.  This is because the only time we're called
 880          * while still attached to a queue, the queue itself
 881          * is about to be destroyed (in svc_destroy).
 882          */
 883         if (test_bit(XPT_TEMP, &xprt->xpt_flags))
 884                 serv->sv_tmpcnt--;
 885
 886         for (dr = svc_deferred_dequeue(xprt); dr;
 887              dr = svc_deferred_dequeue(xprt)) {
 888                 svc_xprt_put(xprt);
 889                 kfree(dr);
 890         }
 891
 892         svc_xprt_put(xprt);
 893         spin_unlock_bh(&serv->sv_lock);
 894 }
 895
 896 void svc_close_xprt(struct svc_xprt *xprt)
 897 {
 898         set_bit(XPT_CLOSE, &xprt->xpt_flags);
 899         if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags))
 900                 /* someone else will have to effect the close */
 901                 return;
 902
 903         svc_xprt_get(xprt);
 904         svc_delete_xprt(xprt);
 905         clear_bit(XPT_BUSY, &xprt->xpt_flags);
 906         svc_xprt_put(xprt);
 907 }
 908 EXPORT_SYMBOL_GPL(svc_close_xprt);
 909
 910 void svc_close_all(struct list_head *xprt_list)
 911 {
 912         struct svc_xprt *xprt;
 913         struct svc_xprt *tmp;
 914
 915         list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) {
 916                 set_bit(XPT_CLOSE, &xprt->xpt_flags);
 917                 if (test_bit(XPT_BUSY, &xprt->xpt_flags)) {
 918                         /* Waiting to be processed, but no threads left,
 919                          * So just remove it from the waiting list
 920                          */
 921                         list_del_init(&xprt->xpt_ready);
 922                         clear_bit(XPT_BUSY, &xprt->xpt_flags);
 923                 }
 924                 svc_close_xprt(xprt);
 925         }
 926 }
 927
 928 /*
 929  * Handle defer and revisit of requests
 930  */
 931
 932 static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
 933 {
 934         struct svc_deferred_req *dr =
 935                 container_of(dreq, struct svc_deferred_req, handle);
 936         struct svc_xprt *xprt = dr->xprt;
 937
 938         spin_lock(&xprt->xpt_lock);
 939         set_bit(XPT_DEFERRED, &xprt->xpt_flags);
 940         if (too_many || test_bit(XPT_DEAD, &xprt->xpt_flags)) {
 941                 spin_unlock(&xprt->xpt_lock);
 942                 dprintk("revisit canceled\n");
 943                 svc_xprt_put(xprt);
 944                 kfree(dr);
 945                 return;
 946         }
 947         dprintk("revisit queued\n");
 948         dr->xprt = NULL;
 949         list_add(&dr->handle.recent, &xprt->xpt_deferred);
 950         spin_unlock(&xprt->xpt_lock);
 951         svc_xprt_enqueue(xprt);
 952         svc_xprt_put(xprt);
 953 }
 954
 955 /*
 956  * Save the request off for later processing. The request buffer looks
 957  * like this:
 958  *
 959  * <xprt-header><rpc-header><rpc-pagelist><rpc-tail>
 960  *
 961  * This code can only handle requests that consist of an xprt-header
 962  * and rpc-header.
 963  */
 964 static struct cache_deferred_req *svc_defer(struct cache_req *req)
 965 {
 966         struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle);
 967         struct svc_deferred_req *dr;
 968
 969         if (rqstp->rq_arg.page_len)
 970                 return NULL; /* if more than a page, give up FIXME */
 971         if (rqstp->rq_deferred) {
 972                 dr = rqstp->rq_deferred;
 973                 rqstp->rq_deferred = NULL;
 974         } else {
 975                 size_t skip;
 976                 size_t size;
 977                 /* FIXME maybe discard if size too large */
 978                 size = sizeof(struct svc_deferred_req) + rqstp->rq_arg.len;
 979                 dr = kmalloc(size, GFP_KERNEL);
 980                 if (dr == NULL)
 981                         return NULL;
 982
 983                 dr->handle.owner = rqstp->rq_server;
 984                 dr->prot = rqstp->rq_prot;
 985                 memcpy(&dr->addr, &rqstp->rq_addr, rqstp->rq_addrlen);
 986                 dr->addrlen = rqstp->rq_addrlen;
 987                 dr->daddr = rqstp->rq_daddr;
 988                 dr->argslen = rqstp->rq_arg.len >> 2;
 989                 dr->xprt_hlen = rqstp->rq_xprt_hlen;
 990
 991                 /* back up head to the start of the buffer and copy */
 992                 skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
 993                 memcpy(dr->args, rqstp->rq_arg.head[0].iov_base - skip,
 994                        dr->argslen << 2);
 995         }
 996         svc_xprt_get(rqstp->rq_xprt);
 997         dr->xprt = rqstp->rq_xprt;
 998
 999         dr->handle.revisit = svc_revisit;
1000         return &dr->handle;
1001 }
1002
1003 /*
1004  * recv data from a deferred request into an active one
1005  */
1006 static int svc_deferred_recv(struct svc_rqst *rqstp)
1007 {
1008         struct svc_deferred_req *dr = rqstp->rq_deferred;
1009
1010         /* setup iov_base past transport header */
1011         rqstp->rq_arg.head[0].iov_base = dr->args + (dr->xprt_hlen>>2);
1012         /* The iov_len does not include the transport header bytes */
1013         rqstp->rq_arg.head[0].iov_len = (dr->argslen<<2) - dr->xprt_hlen;
1014         rqstp->rq_arg.page_len = 0;
1015         /* The rq_arg.len includes the transport header bytes */
1016         rqstp->rq_arg.len     = dr->argslen<<2;
1017         rqstp->rq_prot        = dr->prot;
1018         memcpy(&rqstp->rq_addr, &dr->addr, dr->addrlen);
1019         rqstp->rq_addrlen     = dr->addrlen;
1020         /* Save off transport header len in case we get deferred again */
1021         rqstp->rq_xprt_hlen   = dr->xprt_hlen;
1022         rqstp->rq_daddr       = dr->daddr;
1023         rqstp->rq_respages    = rqstp->rq_pages;
1024         return (dr->argslen<<2) - dr->xprt_hlen;
1025 }
1026
1027
1028 static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
1029 {
1030         struct svc_deferred_req *dr = NULL;
1031
1032         if (!test_bit(XPT_DEFERRED, &xprt->xpt_flags))
1033                 return NULL;
1034         spin_lock(&xprt->xpt_lock);
1035         clear_bit(XPT_DEFERRED, &xprt->xpt_flags);
1036         if (!list_empty(&xprt->xpt_deferred)) {
1037                 dr = list_entry(xprt->xpt_deferred.next,
1038                                 struct svc_deferred_req,
1039                                 handle.recent);
1040                 list_del_init(&dr->handle.recent);
1041                 set_bit(XPT_DEFERRED, &xprt->xpt_flags);
1042         }
1043         spin_unlock(&xprt->xpt_lock);
1044         return dr;
1045 }
1046
1047 /*
1048  * Return the transport instance pointer for the endpoint accepting
1049  * connections/peer traffic from the specified transport class,
1050  * address family and port.
1051  *
1052  * Specifying 0 for the address family or port is effectively a
1053  * wild-card, and will result in matching the first transport in the
1054  * service's list that has a matching class name.
1055  */
1056 struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name,
1057                                int af, int port)
1058 {
1059         struct svc_xprt *xprt;
1060         struct svc_xprt *found = NULL;
1061
1062         /* Sanity check the args */
1063         if (!serv || !xcl_name)
1064                 return found;
1065
1066         spin_lock_bh(&serv->sv_lock);
1067         list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) {
1068                 if (strcmp(xprt->xpt_class->xcl_name, xcl_name))
1069                         continue;
1070                 if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family)
1071                         continue;
1072                 if (port && port != svc_xprt_local_port(xprt))
1073                         continue;
1074                 found = xprt;
1075                 svc_xprt_get(xprt);
1076                 break;
1077         }
1078         spin_unlock_bh(&serv->sv_lock);
1079         return found;
1080 }
1081 EXPORT_SYMBOL_GPL(svc_find_xprt);
1082
1083 /*
1084  * Format a buffer with a list of the active transports. A zero for
1085  * the buflen parameter disables target buffer overflow checking.
1086  */
1087 int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen)
1088 {
1089         struct svc_xprt *xprt;
1090         char xprt_str[64];
1091         int totlen = 0;
1092         int len;
1093
1094         /* Sanity check args */
1095         if (!serv)
1096                 return 0;
1097
1098         spin_lock_bh(&serv->sv_lock);
1099         list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) {
1100                 len = snprintf(xprt_str, sizeof(xprt_str),
1101                                "%s %d\n", xprt->xpt_class->xcl_name,
1102                                svc_xprt_local_port(xprt));
1103                 /* If the string was truncated, replace with error string */
1104                 if (len >= sizeof(xprt_str))
1105                         strcpy(xprt_str, "name-too-long\n");
1106                 /* Don't overflow buffer */
1107                 len = strlen(xprt_str);
1108                 if (buflen && (len + totlen >= buflen))
1109                         break;
1110                 strcpy(buf+totlen, xprt_str);
1111                 totlen += len;
1112         }
1113         spin_unlock_bh(&serv->sv_lock);
1114         return totlen;
1115 }
1116 EXPORT_SYMBOL_GPL(svc_xprt_names);