#ifndef _SOCK_H
#define _SOCK_H
-#include <linux/config.h>
#include <linux/list.h>
#include <linux/timer.h>
#include <linux/cache.h>
#include <linux/module.h>
+#include <linux/lockdep.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h> /* struct sk_buff */
#include <linux/security.h>
spinlock_t slock;
struct sock_iocb *owner;
wait_queue_head_t wq;
+ /*
+ * We express the mutex-alike socket_lock semantics
+ * to the lock validator by explicitly managing
+ * the slock as a lock variant (in addition to
+ * the slock itself):
+ */
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+#endif
} socket_lock_t;
-#define sock_lock_init(__sk) \
-do { spin_lock_init(&((__sk)->sk_lock.slock)); \
- (__sk)->sk_lock.owner = NULL; \
- init_waitqueue_head(&((__sk)->sk_lock.wq)); \
-} while(0)
-
struct sock;
struct proto;
* @skc_node: main hash linkage for various protocol lookup tables
* @skc_bind_node: bind hash linkage for various protocol lookup tables
* @skc_refcnt: reference count
+ * @skc_hash: hash value used with various protocol lookup tables
* @skc_prot: protocol handlers inside a network family
*
* This is the minimal network layer representation of sockets, the header
struct hlist_node skc_node;
struct hlist_node skc_bind_node;
atomic_t skc_refcnt;
+ unsigned int skc_hash;
struct proto *skc_prot;
};
* @sk_receive_queue: incoming packets
* @sk_wmem_alloc: transmit queue bytes committed
* @sk_write_queue: Packet sending queue
+ * @sk_async_wait_queue: DMA copied packets
* @sk_omem_alloc: "o" is "option" or "other"
* @sk_wmem_queued: persistent queue size
* @sk_forward_alloc: space allocated forward
* @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, %SO_OOBINLINE settings
* @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets
* @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
+ * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
* @sk_lingertime: %SO_LINGER l_linger setting
- * @sk_hashent: hash entry in several tables (e.g. inet_hashinfo.ehash)
* @sk_backlog: always used with the per-socket spinlock held
* @sk_callback_lock: used with the callbacks in the end of this struct
* @sk_error_queue: rarely used
#define sk_node __sk_common.skc_node
#define sk_bind_node __sk_common.skc_bind_node
#define sk_refcnt __sk_common.skc_refcnt
+#define sk_hash __sk_common.skc_hash
#define sk_prot __sk_common.skc_prot
unsigned char sk_shutdown : 2,
sk_no_check : 2,
atomic_t sk_omem_alloc;
struct sk_buff_head sk_receive_queue;
struct sk_buff_head sk_write_queue;
+ struct sk_buff_head sk_async_wait_queue;
int sk_wmem_queued;
int sk_forward_alloc;
- unsigned int sk_allocation;
+ gfp_t sk_allocation;
int sk_sndbuf;
int sk_route_caps;
- int sk_hashent;
+ int sk_gso_type;
+ int sk_rcvlowat;
unsigned long sk_flags;
unsigned long sk_lingertime;
/*
unsigned short sk_max_ack_backlog;
__u32 sk_priority;
struct ucred sk_peercred;
- int sk_rcvlowat;
long sk_rcvtimeo;
long sk_sndtimeo;
struct sk_filter *sk_filter;
static inline int sk_hashed(const struct sock *sk)
{
- return sk->sk_node.pprev != NULL;
+ return !sk_unhashed(sk);
}
static __inline__ void sk_node_init(struct hlist_node *node)
SOCK_USE_WRITE_QUEUE, /* whether to call sk->sk_write_space in sock_wfree */
SOCK_DBG, /* %SO_DEBUG setting */
SOCK_RCVTSTAMP, /* %SO_TIMESTAMP setting */
- SOCK_NO_LARGESEND, /* whether to sent large segments or not */
SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */
SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */
};
static inline void sk_stream_free_skb(struct sock *sk, struct sk_buff *skb)
{
+ skb_truesize_check(skb);
sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
sk->sk_wmem_queued -= skb->truesize;
sk->sk_forward_alloc += skb->truesize;
}
/* The per-socket spinlock must be held here. */
-#define sk_add_backlog(__sk, __skb) \
-do { if (!(__sk)->sk_backlog.tail) { \
- (__sk)->sk_backlog.head = \
- (__sk)->sk_backlog.tail = (__skb); \
- } else { \
- ((__sk)->sk_backlog.tail)->next = (__skb); \
- (__sk)->sk_backlog.tail = (__skb); \
- } \
- (__skb)->next = NULL; \
-} while(0)
+static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb)
+{
+ if (!sk->sk_backlog.tail) {
+ sk->sk_backlog.head = sk->sk_backlog.tail = skb;
+ } else {
+ sk->sk_backlog.tail->next = skb;
+ sk->sk_backlog.tail = skb;
+ }
+ skb->next = NULL;
+}
#define sk_wait_event(__sk, __timeo, __condition) \
({ int rc; \
rc = __condition; \
if (!rc) { \
*(__timeo) = schedule_timeout(*(__timeo)); \
- rc = __condition; \
} \
lock_sock(__sk); \
+ rc = __condition; \
rc; \
})
extern int sk_wait_data(struct sock *sk, long *timeo);
struct request_sock_ops;
+struct timewait_sock_ops;
/* Networking protocol blocks we attach to sockets.
* socket layer -> transport layer interface
int (*getsockopt)(struct sock *sk, int level,
int optname, char __user *optval,
int __user *option);
+ int (*compat_setsockopt)(struct sock *sk,
+ int level,
+ int optname, char __user *optval,
+ int optlen);
+ int (*compat_getsockopt)(struct sock *sk,
+ int level,
+ int optname, char __user *optval,
+ int __user *option);
int (*sendmsg)(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg, size_t len);
int (*recvmsg)(struct kiocb *iocb, struct sock *sk,
kmem_cache_t *slab;
unsigned int obj_size;
- kmem_cache_t *twsk_slab;
- unsigned int twsk_obj_size;
atomic_t *orphan_count;
struct request_sock_ops *rsk_prot;
+ struct timewait_sock_ops *twsk_prot;
struct module *owner;
/* BH context may only use the following locking interface. */
#define bh_lock_sock(__sk) spin_lock(&((__sk)->sk_lock.slock))
+#define bh_lock_sock_nested(__sk) \
+ spin_lock_nested(&((__sk)->sk_lock.slock), \
+ SINGLE_DEPTH_NESTING)
#define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock))
extern struct sock *sk_alloc(int family,
- unsigned int __nocast priority,
+ gfp_t priority,
struct proto *prot, int zero_it);
extern void sk_free(struct sock *sk);
extern struct sock *sk_clone(const struct sock *sk,
- const unsigned int __nocast priority);
+ const gfp_t priority);
extern struct sk_buff *sock_wmalloc(struct sock *sk,
unsigned long size, int force,
- unsigned int __nocast priority);
+ gfp_t priority);
extern struct sk_buff *sock_rmalloc(struct sock *sk,
unsigned long size, int force,
- unsigned int __nocast priority);
+ gfp_t priority);
extern void sock_wfree(struct sk_buff *skb);
extern void sock_rfree(struct sk_buff *skb);
int noblock,
int *errcode);
extern void *sock_kmalloc(struct sock *sk, int size,
- unsigned int __nocast priority);
+ gfp_t priority);
extern void sock_kfree_s(struct sock *sk, void *mem, int size);
extern void sk_send_sigurg(struct sock *sk);
struct msghdr *msg, size_t size, int flags);
extern int sock_common_setsockopt(struct socket *sock, int level, int optname,
char __user *optval, int optlen);
+extern int compat_sock_common_getsockopt(struct socket *sock, int level,
+ int optname, char __user *optval, int __user *optlen);
+extern int compat_sock_common_setsockopt(struct socket *sock, int level,
+ int optname, char __user *optval, int optlen);
extern void sk_common_release(struct sock *sk);
*
*/
-static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock)
+static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
{
int err;
+ struct sk_filter *filter;
err = security_sock_rcv_skb(sk, skb);
if (err)
return err;
- if (sk->sk_filter) {
- struct sk_filter *filter;
-
- if (needlock)
- bh_lock_sock(sk);
-
- filter = sk->sk_filter;
- if (filter) {
- int pkt_len = sk_run_filter(skb, filter->insns,
- filter->len);
- if (!pkt_len)
- err = -EPERM;
- else
- skb_trim(skb, pkt_len);
- }
-
- if (needlock)
- bh_unlock_sock(sk);
+ rcu_read_lock_bh();
+ filter = sk->sk_filter;
+ if (filter) {
+ unsigned int pkt_len = sk_run_filter(skb, filter->insns,
+ filter->len);
+ err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
}
+ rcu_read_unlock_bh();
+
return err;
}
* Remove a filter from a socket and release its resources.
*/
+static inline void sk_filter_rcu_free(struct rcu_head *rcu)
+{
+ struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
+ kfree(fp);
+}
+
static inline void sk_filter_release(struct sock *sk, struct sk_filter *fp)
{
unsigned int size = sk_filter_len(fp);
atomic_sub(size, &sk->sk_omem_alloc);
if (atomic_dec_and_test(&fp->refcnt))
- kfree(fp);
+ call_rcu_bh(&fp->rcu, sk_filter_rcu_free);
}
static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
sk_free(sk);
}
+extern int sk_receive_skb(struct sock *sk, struct sk_buff *skb);
+
/* Detach socket from process context.
* Announce socket dead, detach it from wait queue and inode.
* Note that parent inode held reference count on this struct sock,
sk->sk_sleep = &parent->wait;
parent->sk = sk;
sk->sk_socket = parent;
+ security_sock_graft(sk, parent);
write_unlock_bh(&sk->sk_callback_lock);
}
+static inline void sock_copy(struct sock *nsk, const struct sock *osk)
+{
+#ifdef CONFIG_SECURITY_NETWORK
+ void *sptr = nsk->sk_security;
+#endif
+
+ memcpy(nsk, osk, osk->sk_prot->obj_size);
+#ifdef CONFIG_SECURITY_NETWORK
+ nsk->sk_security = sptr;
+ security_sk_clone(osk, nsk);
+#endif
+}
+
extern int sock_i_uid(struct sock *sk);
extern unsigned long sock_i_ino(struct sock *sk);
write_unlock(&sk->sk_dst_lock);
}
-static inline struct dst_entry *
-__sk_dst_check(struct sock *sk, u32 cookie)
-{
- struct dst_entry *dst = sk->sk_dst_cache;
+extern struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie);
- if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
- sk->sk_dst_cache = NULL;
- dst_release(dst);
- return NULL;
- }
+extern struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie);
- return dst;
-}
-
-static inline struct dst_entry *
-sk_dst_check(struct sock *sk, u32 cookie)
+static inline int sk_can_gso(const struct sock *sk)
{
- struct dst_entry *dst = sk_dst_get(sk);
-
- if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
- sk_dst_reset(sk);
- dst_release(dst);
- return NULL;
- }
-
- return dst;
+ return net_gso_ok(sk->sk_route_caps, sk->sk_gso_type);
}
static inline void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
{
__sk_dst_set(sk, dst);
sk->sk_route_caps = dst->dev->features;
- if (sk->sk_route_caps & NETIF_F_TSO) {
- if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len)
- sk->sk_route_caps &= ~NETIF_F_TSO;
+ if (sk->sk_route_caps & NETIF_F_GSO)
+ sk->sk_route_caps |= NETIF_F_GSO_MASK;
+ if (sk_can_gso(sk)) {
+ if (dst->header_len)
+ sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+ else
+ sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
}
}
extern void sk_stop_timer(struct sock *sk, struct timer_list* timer);
-static inline int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
-{
- int err = 0;
- int skb_len;
-
- /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
- number of warnings when compiling with -W --ANK
- */
- if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
- (unsigned)sk->sk_rcvbuf) {
- err = -ENOMEM;
- goto out;
- }
-
- /* It would be deadlock, if sock_queue_rcv_skb is used
- with socket lock! We assume that users of this
- function are lock free.
- */
- err = sk_filter(sk, skb, 1);
- if (err)
- goto out;
-
- skb->dev = NULL;
- skb_set_owner_r(skb, sk);
-
- /* Cache the SKB length before we tack it onto the receive
- * queue. Once it is added it no longer belongs to us and
- * may be freed by other threads of control pulling packets
- * from the queue.
- */
- skb_len = skb->len;
-
- skb_queue_tail(&sk->sk_receive_queue, skb);
-
- if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk, skb_len);
-out:
- return err;
-}
+extern int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
static inline int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
{
static inline int sock_error(struct sock *sk)
{
- int err = xchg(&sk->sk_err, 0);
+ int err;
+ if (likely(!sk->sk_err))
+ return 0;
+ err = xchg(&sk->sk_err, 0);
return -err;
}
static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk,
int size, int mem,
- unsigned int __nocast gfp)
+ gfp_t gfp)
{
struct sk_buff *skb;
int hdr_len;
static inline struct sk_buff *sk_stream_alloc_skb(struct sock *sk,
int size,
- unsigned int __nocast gfp)
+ gfp_t gfp)
{
return sk_stream_alloc_pskb(sk, size, 0, gfp);
}
(skb != (struct sk_buff *)&(sk)->sk_write_queue); \
skb = skb->next)
+/*from STCP for fast SACK Process*/
+#define sk_stream_for_retrans_queue_from(skb, sk) \
+ for (; (skb != (sk)->sk_send_head) && \
+ (skb != (struct sk_buff *)&(sk)->sk_write_queue); \
+ skb = skb->next)
+
/*
* Default write policy as shown to user space via poll/select/SIGIO
*/
return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf / 2);
}
-static inline unsigned int __nocast gfp_any(void)
+static inline gfp_t gfp_any(void)
{
return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
}
* sk_eat_skb - Release a skb if it is no longer needed
* @sk: socket to eat this skb from
* @skb: socket buffer to eat
+ * @copied_early: flag indicating whether DMA operations copied this data early
*
* This routine must be called with interrupts disabled or with the socket
* locked so that the sk_buff queue operation is ok.
*/
-static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb)
+#ifdef CONFIG_NET_DMA
+static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, int copied_early)
+{
+ __skb_unlink(skb, &sk->sk_receive_queue);
+ if (!copied_early)
+ __kfree_skb(skb);
+ else
+ __skb_queue_tail(&sk->sk_async_wait_queue, skb);
+}
+#else
+static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, int copied_early)
{
__skb_unlink(skb, &sk->sk_receive_queue);
__kfree_skb(skb);
}
+#endif
extern void sock_enable_timestamp(struct sock *sk);
extern int sock_get_timestamp(struct sock *, struct timeval __user *);
* Enable debug/info messages
*/
-#if 0
-#define NETDEBUG(fmt, args...) do { } while (0)
-#define LIMIT_NETDEBUG(fmt, args...) do { } while(0)
-#else
+#ifdef CONFIG_NETDEBUG
#define NETDEBUG(fmt, args...) printk(fmt,##args)
#define LIMIT_NETDEBUG(fmt, args...) do { if (net_ratelimit()) printk(fmt,##args); } while(0)
+#else
+#define NETDEBUG(fmt, args...) do { } while (0)
+#define LIMIT_NETDEBUG(fmt, args...) do { } while(0)
#endif
/*