* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
*/
+#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/errno.h>
#include <linux/fcntl.h>
#include <net/sock.h>
#include <net/checksum.h>
#include <net/ip.h>
+#include <net/ipv6.h>
#include <net/tcp_states.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
* svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt.
* when both need to be taken (rare), svc_serv->sv_lock is first.
* BKL protects svc_serv->sv_nrthread.
- * svc_sock->sk_defer_lock protects the svc_sock->sk_deferred list
+ * svc_sock->sk_lock protects the svc_sock->sk_deferred list
+ * and the ->sk_info_authunix cache.
* svc_sock->sk_flags.SK_BUSY prevents a svc_sock being enqueued multiply.
*
* Some flags can be set to certain values at any time
*
*/
-#define RPCDBG_FACILITY RPCDBG_SVCSOCK
+#define RPCDBG_FACILITY RPCDBG_SVCXPRT
static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
static void svc_udp_data_ready(struct sock *, int);
static int svc_udp_recvfrom(struct svc_rqst *);
static int svc_udp_sendto(struct svc_rqst *);
+static void svc_close_socket(struct svc_sock *svsk);
static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk);
static int svc_deferred_recv(struct svc_rqst *rqstp);
static inline void svc_reclassify_socket(struct socket *sock)
{
struct sock *sk = sock->sk;
- BUG_ON(sk->sk_lock.owner != NULL);
+ BUG_ON(sock_owned_by_user(sk));
switch (sk->sk_family) {
case AF_INET:
sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD",
case AF_INET:
snprintf(buf, len, "%u.%u.%u.%u, port=%u",
NIPQUAD(((struct sockaddr_in *) addr)->sin_addr),
- htons(((struct sockaddr_in *) addr)->sin_port));
+ ntohs(((struct sockaddr_in *) addr)->sin_port));
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+
case AF_INET6:
snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u",
NIP6(((struct sockaddr_in6 *) addr)->sin6_addr),
- htons(((struct sockaddr_in6 *) addr)->sin6_port));
+ ntohs(((struct sockaddr_in6 *) addr)->sin6_port));
break;
-#endif
+
default:
snprintf(buf, len, "unknown address type: %d", addr->sa_family);
break;
}
}
+union svc_pktinfo_u {
+ struct in_pktinfo pkti;
+ struct in6_pktinfo pkti6;
+};
+#define SVC_PKTINFO_SPACE \
+ CMSG_SPACE(sizeof(union svc_pktinfo_u))
+
+static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
+{
+ switch (rqstp->rq_sock->sk_sk->sk_family) {
+ case AF_INET: {
+ struct in_pktinfo *pki = CMSG_DATA(cmh);
+
+ cmh->cmsg_level = SOL_IP;
+ cmh->cmsg_type = IP_PKTINFO;
+ pki->ipi_ifindex = 0;
+ pki->ipi_spec_dst.s_addr = rqstp->rq_daddr.addr.s_addr;
+ cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
+ }
+ break;
+
+ case AF_INET6: {
+ struct in6_pktinfo *pki = CMSG_DATA(cmh);
+
+ cmh->cmsg_level = SOL_IPV6;
+ cmh->cmsg_type = IPV6_PKTINFO;
+ pki->ipi6_ifindex = 0;
+ ipv6_addr_copy(&pki->ipi6_addr,
+ &rqstp->rq_daddr.addr6);
+ cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
+ }
+ break;
+ }
+ return;
+}
+
/*
* Generic sendto routine
*/
struct svc_sock *svsk = rqstp->rq_sock;
struct socket *sock = svsk->sk_sock;
int slen;
- char buffer[CMSG_SPACE(sizeof(struct in_pktinfo))];
- struct cmsghdr *cmh = (struct cmsghdr *)buffer;
- struct in_pktinfo *pki = (struct in_pktinfo *)CMSG_DATA(cmh);
+ union {
+ struct cmsghdr hdr;
+ long all[SVC_PKTINFO_SPACE / sizeof(long)];
+ } buffer;
+ struct cmsghdr *cmh = &buffer.hdr;
int len = 0;
int result;
int size;
slen = xdr->len;
if (rqstp->rq_prot == IPPROTO_UDP) {
- /* set the source and destination */
- struct msghdr msg;
- msg.msg_name = &rqstp->rq_addr;
- msg.msg_namelen = rqstp->rq_addrlen;
- msg.msg_iov = NULL;
- msg.msg_iovlen = 0;
- msg.msg_flags = MSG_MORE;
-
- msg.msg_control = cmh;
- msg.msg_controllen = sizeof(buffer);
- cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
- cmh->cmsg_level = SOL_IP;
- cmh->cmsg_type = IP_PKTINFO;
- pki->ipi_ifindex = 0;
- pki->ipi_spec_dst.s_addr = rqstp->rq_daddr.addr.s_addr;
+ struct msghdr msg = {
+ .msg_name = &rqstp->rq_addr,
+ .msg_namelen = rqstp->rq_addrlen,
+ .msg_control = cmh,
+ .msg_controllen = sizeof(buffer),
+ .msg_flags = MSG_MORE,
+ };
+
+ svc_set_cmsg_data(rqstp, cmh);
if (sock_sendmsg(sock, &msg, 0) < 0)
goto out;
struct msghdr msg = {
.msg_flags = MSG_DONTWAIT,
};
+ struct sockaddr *sin;
int len;
len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen,
memcpy(&rqstp->rq_addr, &svsk->sk_remote, svsk->sk_remotelen);
rqstp->rq_addrlen = svsk->sk_remotelen;
+ /* Destination address in request is needed for binding the
+ * source address in RPC callbacks later.
+ */
+ sin = (struct sockaddr *)&svsk->sk_local;
+ switch (sin->sa_family) {
+ case AF_INET:
+ rqstp->rq_daddr.addr = ((struct sockaddr_in *)sin)->sin_addr;
+ break;
+ case AF_INET6:
+ rqstp->rq_daddr.addr6 = ((struct sockaddr_in6 *)sin)->sin6_addr;
+ break;
+ }
+
dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
svsk, iov[0].iov_base, iov[0].iov_len, len);
}
}
+static inline void svc_udp_get_dest_address(struct svc_rqst *rqstp,
+ struct cmsghdr *cmh)
+{
+ switch (rqstp->rq_sock->sk_sk->sk_family) {
+ case AF_INET: {
+ struct in_pktinfo *pki = CMSG_DATA(cmh);
+ rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr;
+ break;
+ }
+ case AF_INET6: {
+ struct in6_pktinfo *pki = CMSG_DATA(cmh);
+ ipv6_addr_copy(&rqstp->rq_daddr.addr6, &pki->ipi6_addr);
+ break;
+ }
+ }
+}
+
/*
* Receive a datagram from a UDP socket.
*/
static int
svc_udp_recvfrom(struct svc_rqst *rqstp)
{
- struct sockaddr_in *sin = svc_addr_in(rqstp);
struct svc_sock *svsk = rqstp->rq_sock;
struct svc_serv *serv = svsk->sk_server;
struct sk_buff *skb;
+ union {
+ struct cmsghdr hdr;
+ long all[SVC_PKTINFO_SPACE / sizeof(long)];
+ } buffer;
+ struct cmsghdr *cmh = &buffer.hdr;
int err, len;
+ struct msghdr msg = {
+ .msg_name = svc_addr(rqstp),
+ .msg_control = cmh,
+ .msg_controllen = sizeof(buffer),
+ .msg_flags = MSG_DONTWAIT,
+ };
if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags))
/* udp sockets need large rcvbuf as all pending
}
clear_bit(SK_DATA, &svsk->sk_flags);
- while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) {
- if (err == -EAGAIN) {
- svc_sock_received(svsk);
- return err;
+ skb = NULL;
+ err = kernel_recvmsg(svsk->sk_sock, &msg, NULL,
+ 0, 0, MSG_PEEK | MSG_DONTWAIT);
+ if (err >= 0)
+ skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err);
+
+ if (skb == NULL) {
+ if (err != -EAGAIN) {
+ /* possibly an icmp error */
+ dprintk("svc: recvfrom returned error %d\n", -err);
+ set_bit(SK_DATA, &svsk->sk_flags);
}
- /* possibly an icmp error */
- dprintk("svc: recvfrom returned error %d\n", -err);
+ svc_sock_received(svsk);
+ return -EAGAIN;
}
- if (skb->tstamp.off_sec == 0) {
- struct timeval tv;
-
- tv.tv_sec = xtime.tv_sec;
- tv.tv_usec = xtime.tv_nsec / NSEC_PER_USEC;
- skb_set_timestamp(skb, &tv);
+ rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
+ if (skb->tstamp.tv64 == 0) {
+ skb->tstamp = ktime_get_real();
/* Don't enable netstamp, sunrpc doesn't
need that much accuracy */
}
- skb_get_timestamp(skb, &svsk->sk_sk->sk_stamp);
+ svsk->sk_sk->sk_stamp = skb->tstamp;
set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */
/*
len = skb->len - sizeof(struct udphdr);
rqstp->rq_arg.len = len;
- rqstp->rq_prot = IPPROTO_UDP;
-
- /* Get sender address */
- sin->sin_family = AF_INET;
- sin->sin_port = skb->h.uh->source;
- sin->sin_addr.s_addr = skb->nh.iph->saddr;
- rqstp->rq_addrlen = sizeof(struct sockaddr_in);
+ rqstp->rq_prot = IPPROTO_UDP;
- /* Remember which interface received this request */
- rqstp->rq_daddr.addr.s_addr = skb->nh.iph->daddr;
+ if (cmh->cmsg_level != IPPROTO_IP ||
+ cmh->cmsg_type != IP_PKTINFO) {
+ if (net_ratelimit())
+ printk("rpcsvc: received unknown control message:"
+ "%d/%d\n",
+ cmh->cmsg_level, cmh->cmsg_type);
+ skb_free_datagram(svsk->sk_sk, skb);
+ return 0;
+ }
+ svc_udp_get_dest_address(rqstp, cmh);
if (skb_is_nonlinear(skb)) {
/* we have to copy */
} else {
rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
rqstp->rq_respages = rqstp->rq_pages + 1 +
- (rqstp->rq_arg.page_len + PAGE_SIZE - 1)/ PAGE_SIZE;
+ DIV_ROUND_UP(rqstp->rq_arg.page_len, PAGE_SIZE);
}
if (serv->sv_stats)
return error;
}
+static struct svc_xprt_ops svc_udp_ops = {
+ .xpo_recvfrom = svc_udp_recvfrom,
+ .xpo_sendto = svc_udp_sendto,
+};
+
+static struct svc_xprt_class svc_udp_class = {
+ .xcl_name = "udp",
+ .xcl_ops = &svc_udp_ops,
+ .xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP,
+};
+
static void
svc_udp_init(struct svc_sock *svsk)
{
+ int one = 1;
+ mm_segment_t oldfs;
+
+ svc_xprt_init(&svc_udp_class, &svsk->sk_xprt);
svsk->sk_sk->sk_data_ready = svc_udp_data_ready;
svsk->sk_sk->sk_write_space = svc_write_space;
- svsk->sk_recvfrom = svc_udp_recvfrom;
- svsk->sk_sendto = svc_udp_sendto;
/* initialise setting must have enough space to
* receive and respond to one request.
set_bit(SK_DATA, &svsk->sk_flags); /* might have come in before data_ready set up */
set_bit(SK_CHNGBUF, &svsk->sk_flags);
+
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
+ /* make sure we get destination address info */
+ svsk->sk_sock->ops->setsockopt(svsk->sk_sock, IPPROTO_IP, IP_PKTINFO,
+ (char __user *)&one, sizeof(one));
+ set_fs(oldfs);
}
/*
wake_up_interruptible(sk->sk_sleep);
}
+static inline int svc_port_is_privileged(struct sockaddr *sin)
+{
+ switch (sin->sa_family) {
+ case AF_INET:
+ return ntohs(((struct sockaddr_in *)sin)->sin_port)
+ < PROT_SOCK;
+ case AF_INET6:
+ return ntohs(((struct sockaddr_in6 *)sin)->sin6_port)
+ < PROT_SOCK;
+ default:
+ return 0;
+ }
+}
+
/*
* Accept a TCP connection
*/
static void
svc_tcp_accept(struct svc_sock *svsk)
{
- struct sockaddr_in sin;
+ struct sockaddr_storage addr;
+ struct sockaddr *sin = (struct sockaddr *) &addr;
struct svc_serv *serv = svsk->sk_server;
struct socket *sock = svsk->sk_sock;
struct socket *newsock;
set_bit(SK_CONN, &svsk->sk_flags);
svc_sock_enqueue(svsk);
- slen = sizeof(sin);
- err = kernel_getpeername(newsock, (struct sockaddr *) &sin, &slen);
+ err = kernel_getpeername(newsock, sin, &slen);
if (err < 0) {
if (net_ratelimit())
printk(KERN_WARNING "%s: peername failed (err %d)!\n",
* hosts here, but when we get encryption, the IP of the host won't
* tell us anything. For now just warn about unpriv connections.
*/
- if (ntohs(sin.sin_port) >= 1024) {
+ if (!svc_port_is_privileged(sin)) {
dprintk(KERN_WARNING
"%s: connect from unprivileged port: %s\n",
serv->sv_name,
- __svc_print_addr((struct sockaddr *) &sin, buf,
- sizeof(buf)));
+ __svc_print_addr(sin, buf, sizeof(buf)));
}
dprintk("%s: connect from %s\n", serv->sv_name,
- __svc_print_addr((struct sockaddr *) &sin, buf,
- sizeof(buf)));
+ __svc_print_addr(sin, buf, sizeof(buf)));
/* make sure that a write doesn't block forever when
* low on memory
if (!(newsvsk = svc_setup_socket(serv, newsock, &err,
(SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY))))
goto failed;
- memcpy(&newsvsk->sk_remote, &sin, slen);
+ memcpy(&newsvsk->sk_remote, sin, slen);
newsvsk->sk_remotelen = slen;
+ err = kernel_getsockname(newsock, sin, &slen);
+ if (unlikely(err < 0)) {
+ dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err);
+ slen = offsetof(struct sockaddr, sa_data);
+ }
+ memcpy(&newsvsk->sk_local, sin, slen);
svc_sock_received(newsvsk);
serv->sv_name);
printk(KERN_NOTICE
"%s: last TCP connect from %s\n",
- serv->sv_name, buf);
+ serv->sv_name, __svc_print_addr(sin,
+ buf, sizeof(buf)));
}
/*
* Always select the oldest socket. It's not fair,
return sent;
}
+static struct svc_xprt_ops svc_tcp_ops = {
+ .xpo_recvfrom = svc_tcp_recvfrom,
+ .xpo_sendto = svc_tcp_sendto,
+};
+
+static struct svc_xprt_class svc_tcp_class = {
+ .xcl_name = "tcp",
+ .xcl_ops = &svc_tcp_ops,
+ .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
+};
+
+void svc_init_xprt_sock(void)
+{
+ svc_reg_xprt_class(&svc_tcp_class);
+ svc_reg_xprt_class(&svc_udp_class);
+}
+
+void svc_cleanup_xprt_sock(void)
+{
+ svc_unreg_xprt_class(&svc_tcp_class);
+ svc_unreg_xprt_class(&svc_udp_class);
+}
+
static void
svc_tcp_init(struct svc_sock *svsk)
{
struct sock *sk = svsk->sk_sk;
struct tcp_sock *tp = tcp_sk(sk);
- svsk->sk_recvfrom = svc_tcp_recvfrom;
- svsk->sk_sendto = svc_tcp_sendto;
+ svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt);
if (sk->sk_state == TCP_LISTEN) {
dprintk("setting up TCP socket for listening\n");
svc_recv(struct svc_rqst *rqstp, long timeout)
{
struct svc_sock *svsk = NULL;
- struct sockaddr_in *sin = svc_addr_in(rqstp);
struct svc_serv *serv = rqstp->rq_server;
struct svc_pool *pool = rqstp->rq_pool;
int len, i;
dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n",
rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse));
- len = svsk->sk_recvfrom(rqstp);
+ len = svsk->sk_xprt.xpt_ops->xpo_recvfrom(rqstp);
dprintk("svc: got len=%d\n", len);
/* No data, incomplete (TCP) read, or accept() */
svsk->sk_lastrecv = get_seconds();
clear_bit(SK_OLD, &svsk->sk_flags);
- rqstp->rq_secure = ntohs(sin->sin_port) < PROT_SOCK;
+ rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp));
rqstp->rq_chandle.defer = svc_defer;
if (serv->sv_stats)
if (test_bit(SK_DEAD, &svsk->sk_flags))
len = -ENOTCONN;
else
- len = svsk->sk_sendto(rqstp);
+ len = svsk->sk_xprt.xpt_ops->xpo_sendto(rqstp);
mutex_unlock(&svsk->sk_mutex);
svc_sock_release(rqstp);
if (!test_and_set_bit(SK_OLD, &svsk->sk_flags))
continue;
- if (atomic_read(&svsk->sk_inuse) || test_bit(SK_BUSY, &svsk->sk_flags))
+ if (atomic_read(&svsk->sk_inuse) > 1 || test_bit(SK_BUSY, &svsk->sk_flags))
continue;
atomic_inc(&svsk->sk_inuse);
list_move(le, &to_be_aged);
svsk->sk_server = serv;
atomic_set(&svsk->sk_inuse, 1);
svsk->sk_lastrecv = get_seconds();
- spin_lock_init(&svsk->sk_defer_lock);
+ spin_lock_init(&svsk->sk_lock);
INIT_LIST_HEAD(&svsk->sk_deferred);
INIT_LIST_HEAD(&svsk->sk_ready);
mutex_init(&svsk->sk_mutex);
* Create socket for RPC service.
*/
static int svc_create_socket(struct svc_serv *serv, int protocol,
- struct sockaddr_in *sin, int flags)
+ struct sockaddr *sin, int len, int flags)
{
struct svc_sock *svsk;
struct socket *sock;
dprintk("svc: svc_create_socket(%s, %d, %s)\n",
serv->sv_program->pg_name, protocol,
- __svc_print_addr((struct sockaddr *) sin, buf,
- sizeof(buf)));
+ __svc_print_addr(sin, buf, sizeof(buf)));
if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) {
printk(KERN_WARNING "svc: only UDP and TCP "
}
type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
- if ((error = sock_create_kern(PF_INET, type, protocol, &sock)) < 0)
+ error = sock_create_kern(sin->sa_family, type, protocol, &sock);
+ if (error < 0)
return error;
svc_reclassify_socket(sock);
if (type == SOCK_STREAM)
- sock->sk->sk_reuse = 1; /* allow address reuse */
- error = kernel_bind(sock, (struct sockaddr *) sin,
- sizeof(*sin));
+ sock->sk->sk_reuse = 1; /* allow address reuse */
+ error = kernel_bind(sock, sin, len);
if (error < 0)
goto bummer;
spin_unlock_bh(&serv->sv_lock);
}
-void svc_close_socket(struct svc_sock *svsk)
+static void svc_close_socket(struct svc_sock *svsk)
{
set_bit(SK_CLOSE, &svsk->sk_flags);
if (test_and_set_bit(SK_BUSY, &svsk->sk_flags))
svc_sock_put(svsk);
}
+void svc_force_close_socket(struct svc_sock *svsk)
+{
+ set_bit(SK_CLOSE, &svsk->sk_flags);
+ if (test_bit(SK_BUSY, &svsk->sk_flags)) {
+ /* Waiting to be processed, but no threads left,
+ * So just remove it from the waiting list
+ */
+ list_del_init(&svsk->sk_ready);
+ clear_bit(SK_BUSY, &svsk->sk_flags);
+ }
+ svc_close_socket(svsk);
+}
+
/**
* svc_makesock - Make a socket for nfsd and lockd
* @serv: RPC server structure
};
dprintk("svc: creating socket proto = %d\n", protocol);
- return svc_create_socket(serv, protocol, &sin, flags);
+ return svc_create_socket(serv, protocol, (struct sockaddr *) &sin,
+ sizeof(sin), flags);
}
/*
dprintk("revisit queued\n");
svsk = dr->svsk;
dr->svsk = NULL;
- spin_lock_bh(&svsk->sk_defer_lock);
+ spin_lock(&svsk->sk_lock);
list_add(&dr->handle.recent, &svsk->sk_deferred);
- spin_unlock_bh(&svsk->sk_defer_lock);
+ spin_unlock(&svsk->sk_lock);
set_bit(SK_DEFERRED, &svsk->sk_flags);
svc_sock_enqueue(svsk);
svc_sock_put(svsk);
if (!test_bit(SK_DEFERRED, &svsk->sk_flags))
return NULL;
- spin_lock_bh(&svsk->sk_defer_lock);
+ spin_lock(&svsk->sk_lock);
clear_bit(SK_DEFERRED, &svsk->sk_flags);
if (!list_empty(&svsk->sk_deferred)) {
dr = list_entry(svsk->sk_deferred.next,
list_del_init(&dr->handle.recent);
set_bit(SK_DEFERRED, &svsk->sk_flags);
}
- spin_unlock_bh(&svsk->sk_defer_lock);
+ spin_unlock(&svsk->sk_lock);
return dr;
}