X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=include%2Fnet%2Fsock.h;h=4bb1ff9fd15bcc2b1bbc24c3cbd853fcd796da5c;hb=b5bb14386eabcb4229ade2bc0a2b237ca166d37d;hp=b433b1ed203dae905d5b6284625775ce1a6c7405;hpb=fc8717baa8f52dd8d1b90df9008300ef3ec794ed;p=safe%2Fjmp%2Flinux-2.6 diff --git a/include/net/sock.h b/include/net/sock.h index b433b1e..4bb1ff9 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -42,17 +42,18 @@ #include #include +#include #include #include #include #include #include -#include #include /* struct sk_buff */ #include #include #include +#include #include #include @@ -107,6 +108,7 @@ struct net; * @skc_reuse: %SO_REUSEADDR setting * @skc_bound_dev_if: bound device index if != 0 * @skc_node: main hash linkage for various protocol lookup tables + * @skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol * @skc_bind_node: bind hash linkage for various protocol lookup tables * @skc_refcnt: reference count * @skc_hash: hash value used with various protocol lookup tables @@ -121,12 +123,17 @@ struct sock_common { volatile unsigned char skc_state; unsigned char skc_reuse; int skc_bound_dev_if; - struct hlist_node skc_node; + union { + struct hlist_node skc_node; + struct hlist_nulls_node skc_nulls_node; + }; struct hlist_node skc_bind_node; atomic_t skc_refcnt; unsigned int skc_hash; struct proto *skc_prot; +#ifdef CONFIG_NET_NS struct net *skc_net; +#endif }; /** @@ -151,7 +158,7 @@ struct sock_common { * @sk_allocation: allocation mode * @sk_sndbuf: size of send buffer in bytes * @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, - * %SO_OOBINLINE settings + * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) @@ -165,7 +172,7 @@ struct sock_common { * @sk_err: last error * @sk_err_soft: errors that don't cause failure but are the cause of a * persistent failure not just 'timed out' - * @sk_drops: raw drops counter + * @sk_drops: raw/udp drops counter * @sk_ack_backlog: current listen backlog * @sk_max_ack_backlog: listen backlog set in listen() * @sk_priority: %SO_PRIORITY setting @@ -205,6 +212,7 @@ struct sock { #define sk_reuse __sk_common.skc_reuse #define sk_bound_dev_if __sk_common.skc_bound_dev_if #define sk_node __sk_common.skc_node +#define sk_nulls_node __sk_common.skc_nulls_node #define sk_bind_node __sk_common.skc_bind_node #define sk_refcnt __sk_common.skc_refcnt #define sk_hash __sk_common.skc_hash @@ -228,7 +236,9 @@ struct sock { } sk_backlog; wait_queue_head_t *sk_sleep; struct dst_entry *sk_dst_cache; +#ifdef CONFIG_XFRM struct xfrm_policy *sk_policy[2]; +#endif rwlock_t sk_dst_lock; atomic_t sk_rmem_alloc; atomic_t sk_wmem_alloc; @@ -236,7 +246,9 @@ struct sock { int sk_sndbuf; struct sk_buff_head sk_receive_queue; struct sk_buff_head sk_write_queue; +#ifdef CONFIG_NET_DMA struct sk_buff_head sk_async_wait_queue; +#endif int sk_wmem_queued; int sk_forward_alloc; gfp_t sk_allocation; @@ -268,7 +280,9 @@ struct sock { struct sk_buff *sk_send_head; __u32 sk_sndmsg_off; int sk_write_pending; +#ifdef CONFIG_SECURITY void *sk_security; +#endif __u32 sk_mark; /* XXX 4 bytes hole on 64 bit */ void (*sk_state_change)(struct sock *sk); @@ -293,12 +307,30 @@ static inline struct sock *sk_head(const struct hlist_head *head) return hlist_empty(head) ? NULL : __sk_head(head); } +static inline struct sock *__sk_nulls_head(const struct hlist_nulls_head *head) +{ + return hlist_nulls_entry(head->first, struct sock, sk_nulls_node); +} + +static inline struct sock *sk_nulls_head(const struct hlist_nulls_head *head) +{ + return hlist_nulls_empty(head) ? NULL : __sk_nulls_head(head); +} + static inline struct sock *sk_next(const struct sock *sk) { return sk->sk_node.next ? hlist_entry(sk->sk_node.next, struct sock, sk_node) : NULL; } +static inline struct sock *sk_nulls_next(const struct sock *sk) +{ + return (!is_a_nulls(sk->sk_nulls_node.next)) ? + hlist_nulls_entry(sk->sk_nulls_node.next, + struct sock, sk_nulls_node) : + NULL; +} + static inline int sk_unhashed(const struct sock *sk) { return hlist_unhashed(&sk->sk_node); @@ -314,6 +346,11 @@ static __inline__ void sk_node_init(struct hlist_node *node) node->pprev = NULL; } +static __inline__ void sk_nulls_node_init(struct hlist_nulls_node *node) +{ + node->pprev = NULL; +} + static __inline__ void __sk_del_node(struct sock *sk) { __hlist_del(&sk->sk_node); @@ -360,6 +397,27 @@ static __inline__ int sk_del_node_init(struct sock *sk) return rc; } +static __inline__ int __sk_nulls_del_node_init_rcu(struct sock *sk) +{ + if (sk_hashed(sk)) { + hlist_nulls_del_init_rcu(&sk->sk_nulls_node); + return 1; + } + return 0; +} + +static __inline__ int sk_nulls_del_node_init_rcu(struct sock *sk) +{ + int rc = __sk_nulls_del_node_init_rcu(sk); + + if (rc) { + /* paranoid for a while -acme */ + WARN_ON(atomic_read(&sk->sk_refcnt) == 1); + __sock_put(sk); + } + return rc; +} + static __inline__ void __sk_add_node(struct sock *sk, struct hlist_head *list) { hlist_add_head(&sk->sk_node, list); @@ -371,6 +429,17 @@ static __inline__ void sk_add_node(struct sock *sk, struct hlist_head *list) __sk_add_node(sk, list); } +static __inline__ void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) +{ + hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); +} + +static __inline__ void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) +{ + sock_hold(sk); + __sk_nulls_add_node_rcu(sk, list); +} + static __inline__ void __sk_del_bind_node(struct sock *sk) { __hlist_del(&sk->sk_bind_node); @@ -384,9 +453,16 @@ static __inline__ void sk_add_bind_node(struct sock *sk, #define sk_for_each(__sk, node, list) \ hlist_for_each_entry(__sk, node, list, sk_node) +#define sk_nulls_for_each(__sk, node, list) \ + hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node) +#define sk_nulls_for_each_rcu(__sk, node, list) \ + hlist_nulls_for_each_entry_rcu(__sk, node, list, sk_nulls_node) #define sk_for_each_from(__sk, node) \ if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ hlist_for_each_entry_from(__sk, node, sk_node) +#define sk_nulls_for_each_from(__sk, node) \ + if (__sk && ({ node = &(__sk)->sk_nulls_node; 1; })) \ + hlist_nulls_for_each_entry_from(__sk, node, sk_nulls_node) #define sk_for_each_continue(__sk, node) \ if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ hlist_for_each_entry_continue(__sk, node, sk_node) @@ -412,6 +488,13 @@ enum sock_flags { SOCK_RCVTSTAMPNS, /* %SO_TIMESTAMPNS setting */ SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */ SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */ + SOCK_TIMESTAMPING_TX_HARDWARE, /* %SOF_TIMESTAMPING_TX_HARDWARE */ + SOCK_TIMESTAMPING_TX_SOFTWARE, /* %SOF_TIMESTAMPING_TX_SOFTWARE */ + SOCK_TIMESTAMPING_RX_HARDWARE, /* %SOF_TIMESTAMPING_RX_HARDWARE */ + SOCK_TIMESTAMPING_RX_SOFTWARE, /* %SOF_TIMESTAMPING_RX_SOFTWARE */ + SOCK_TIMESTAMPING_SOFTWARE, /* %SOF_TIMESTAMPING_SOFTWARE */ + SOCK_TIMESTAMPING_RAW_HARDWARE, /* %SOF_TIMESTAMPING_RAW_HARDWARE */ + SOCK_TIMESTAMPING_SYS_HARDWARE, /* %SOF_TIMESTAMPING_SYS_HARDWARE */ }; static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) @@ -481,6 +564,11 @@ static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb) skb->next = NULL; } +static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) +{ + return sk->sk_backlog_rcv(sk, skb); +} + #define sk_wait_event(__sk, __timeo, __condition) \ ({ int __rc; \ release_sock(__sk); \ @@ -523,7 +611,7 @@ struct proto { int (*ioctl)(struct sock *sk, int cmd, unsigned long arg); int (*init)(struct sock *sk); - int (*destroy)(struct sock *sk); + void (*destroy)(struct sock *sk); void (*shutdown)(struct sock *sk, int how); int (*setsockopt)(struct sock *sk, int level, int optname, char __user *optval, @@ -531,6 +619,7 @@ struct proto { int (*getsockopt)(struct sock *sk, int level, int optname, char __user *optval, int __user *option); +#ifdef CONFIG_COMPAT int (*compat_setsockopt)(struct sock *sk, int level, int optname, char __user *optval, @@ -539,6 +628,7 @@ struct proto { int level, int optname, char __user *optval, int __user *option); +#endif int (*sendmsg)(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len); int (*recvmsg)(struct kiocb *iocb, struct sock *sk, @@ -560,13 +650,13 @@ struct proto { /* Keeping track of sockets in use */ #ifdef CONFIG_PROC_FS - struct pcounter inuse; + unsigned int inuse_idx; #endif /* Memory pressure */ - void (*enter_memory_pressure)(void); + void (*enter_memory_pressure)(struct sock *sk); atomic_t *memory_allocated; /* Current allocated memory. */ - atomic_t *sockets_allocated; /* Current number of sockets. */ + struct percpu_counter *sockets_allocated; /* Current number of sockets. */ /* * Pressure flag: try to collapse. * Technical note: it is used by multiple contexts non atomically. @@ -579,17 +669,18 @@ struct proto { int *sysctl_rmem; int max_header; - struct kmem_cache *slab; + struct kmem_cache *slab; unsigned int obj_size; + int slab_flags; - atomic_t *orphan_count; + struct percpu_counter *orphan_count; struct request_sock_ops *rsk_prot; struct timewait_sock_ops *twsk_prot; union { struct inet_hashinfo *hashinfo; - struct hlist_head *udp_hash; + struct udp_table *udp_table; struct raw_hashinfo *raw_hash; } h; @@ -633,36 +724,12 @@ static inline void sk_refcnt_debug_release(const struct sock *sk) #ifdef CONFIG_PROC_FS -# define DEFINE_PROTO_INUSE(NAME) DEFINE_PCOUNTER(NAME) -# define REF_PROTO_INUSE(NAME) PCOUNTER_MEMBER_INITIALIZER(NAME, .inuse) /* Called with local bh disabled */ -static inline void sock_prot_inuse_add(struct proto *prot, int inc) -{ - pcounter_add(&prot->inuse, inc); -} -static inline int sock_prot_inuse_init(struct proto *proto) -{ - return pcounter_alloc(&proto->inuse); -} -static inline int sock_prot_inuse_get(struct proto *proto) -{ - return pcounter_getval(&proto->inuse); -} -static inline void sock_prot_inuse_free(struct proto *proto) -{ - pcounter_free(&proto->inuse); -} +extern void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc); +extern int sock_prot_inuse_get(struct net *net, struct proto *proto); #else -# define DEFINE_PROTO_INUSE(NAME) -# define REF_PROTO_INUSE(NAME) -static void inline sock_prot_inuse_add(struct proto *prot, int inc) -{ -} -static int inline sock_prot_inuse_init(struct proto *proto) -{ - return 0; -} -static void inline sock_prot_inuse_free(struct proto *proto) +static void inline sock_prot_inuse_add(struct net *net, struct proto *prot, + int inc) { } #endif @@ -800,7 +867,6 @@ static inline void sk_mem_uncharge(struct sock *sk, int size) static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) { - skb_truesize_check(skb); sock_set_flag(sk, SOCK_QUEUE_SHRUNK); sk->sk_wmem_queued -= skb->truesize; sk_mem_uncharge(sk, skb->truesize); @@ -831,7 +897,7 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) */ #define sock_lock_init_class_and_name(sk, sname, skey, name, key) \ do { \ - sk->sk_lock.owned = 0; \ + sk->sk_lock.owned = 0; \ init_waitqueue_head(&sk->sk_lock.wq); \ spin_lock_init(&(sk)->sk_lock.slock); \ debug_check_no_locks_freed((void *)&(sk)->sk_lock, \ @@ -885,6 +951,11 @@ extern struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, int noblock, int *errcode); +extern struct sk_buff *sock_alloc_send_pskb(struct sock *sk, + unsigned long header_len, + unsigned long data_len, + int noblock, + int *errcode); extern void *sock_kmalloc(struct sock *sk, int size, gfp_t priority); extern void sock_kfree_s(struct sock *sk, void *mem, int size); @@ -951,43 +1022,7 @@ extern void sk_common_release(struct sock *sk); extern void sock_init_data(struct socket *sock, struct sock *sk); /** - * sk_filter - run a packet through a socket filter - * @sk: sock associated with &sk_buff - * @skb: buffer to filter - * @needlock: set to 1 if the sock is not locked by caller. - * - * Run the filter code and then cut skb->data to correct size returned by - * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller - * than pkt_len we keep whole skb->data. This is the socket level - * wrapper to sk_run_filter. It returns 0 if the packet should - * be accepted or -EPERM if the packet should be tossed. - * - */ - -static inline int sk_filter(struct sock *sk, struct sk_buff *skb) -{ - int err; - struct sk_filter *filter; - - err = security_sock_rcv_skb(sk, skb); - if (err) - return err; - - rcu_read_lock_bh(); - filter = rcu_dereference(sk->sk_filter); - if (filter) { - unsigned int pkt_len = sk_run_filter(skb, filter->insns, - filter->len); - err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; - } - rcu_read_unlock_bh(); - - return err; -} - -/** * sk_filter_release: Release a socket filter - * @sk: socket * @fp: filter to remove * * Remove a filter from a socket and release its resources. @@ -1048,6 +1083,11 @@ static inline void sock_put(struct sock *sk) extern int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested); +static inline void sk_set_socket(struct sock *sk, struct socket *sock) +{ + sk->sk_socket = sock; +} + /* Detach socket from process context. * Announce socket dead, detach it from wait queue and inode. * Note that parent inode held reference count on this struct sock, @@ -1059,7 +1099,7 @@ static inline void sock_orphan(struct sock *sk) { write_lock_bh(&sk->sk_callback_lock); sock_set_flag(sk, SOCK_DEAD); - sk->sk_socket = NULL; + sk_set_socket(sk, NULL); sk->sk_sleep = NULL; write_unlock_bh(&sk->sk_callback_lock); } @@ -1069,7 +1109,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) write_lock_bh(&sk->sk_callback_lock); sk->sk_sleep = &parent->wait; parent->sk = sk; - sk->sk_socket = parent; + sk_set_socket(sk, parent); security_sock_graft(sk, parent); write_unlock_bh(&sk->sk_callback_lock); } @@ -1263,7 +1303,7 @@ static inline struct page *sk_stream_alloc_page(struct sock *sk) page = alloc_pages(sk->sk_allocation, 0); if (!page) { - sk->sk_prot->enter_memory_pressure(); + sk->sk_prot->enter_memory_pressure(sk); sk_stream_moderate_sndbuf(sk); } return page; @@ -1279,7 +1319,7 @@ static inline int sock_writeable(const struct sock *sk) static inline gfp_t gfp_any(void) { - return in_atomic() ? GFP_ATOMIC : GFP_KERNEL; + return in_softirq() ? GFP_ATOMIC : GFP_KERNEL; } static inline long sock_rcvtimeo(const struct sock *sk, int noblock) @@ -1312,14 +1352,45 @@ static __inline__ void sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { ktime_t kt = skb->tstamp; + struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb); - if (sock_flag(sk, SOCK_RCVTSTAMP)) + /* + * generate control messages if + * - receive time stamping in software requested (SOCK_RCVTSTAMP + * or SOCK_TIMESTAMPING_RX_SOFTWARE) + * - software time stamp available and wanted + * (SOCK_TIMESTAMPING_SOFTWARE) + * - hardware time stamps available and wanted + * (SOCK_TIMESTAMPING_SYS_HARDWARE or + * SOCK_TIMESTAMPING_RAW_HARDWARE) + */ + if (sock_flag(sk, SOCK_RCVTSTAMP) || + sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE) || + (kt.tv64 && sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) || + (hwtstamps->hwtstamp.tv64 && + sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE)) || + (hwtstamps->syststamp.tv64 && + sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE))) __sock_recv_timestamp(msg, sk, skb); else sk->sk_stamp = kt; } /** + * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped + * @msg: outgoing packet + * @sk: socket sending this packet + * @shtx: filled with instructions for time stamping + * + * Currently only depends on SOCK_TIMESTAMPING* flags. Returns error code if + * parameters are invalid. + */ +extern int sock_tx_timestamp(struct msghdr *msg, + struct sock *sk, + union skb_shared_tx *shtx); + + +/** * sk_eat_skb - Release a skb if it is no longer needed * @sk: socket to eat this skb from * @skb: socket buffer to eat @@ -1345,6 +1416,24 @@ static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, int copied_e } #endif +static inline +struct net *sock_net(const struct sock *sk) +{ +#ifdef CONFIG_NET_NS + return sk->sk_net; +#else + return &init_net; +#endif +} + +static inline +void sock_net_set(struct sock *sk, struct net *net) +{ +#ifdef CONFIG_NET_NS + sk->sk_net = net; +#endif +} + /* * Kernel sockets, f.e. rtnl or icmp_socket, are a part of a namespace. * They should not hold a referrence to a namespace in order to allow @@ -1353,11 +1442,23 @@ static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, int copied_e */ static inline void sk_change_net(struct sock *sk, struct net *net) { - put_net(sk->sk_net); - sk->sk_net = net; + put_net(sock_net(sk)); + sock_net_set(sk, hold_net(net)); } -extern void sock_enable_timestamp(struct sock *sk); +static inline struct sock *skb_steal_sock(struct sk_buff *skb) +{ + if (unlikely(skb->sk)) { + struct sock *sk = skb->sk; + + skb->destructor = NULL; + skb->sk = NULL; + return sk; + } + return NULL; +} + +extern void sock_enable_timestamp(struct sock *sk, int flag); extern int sock_get_timestamp(struct sock *, struct timeval __user *); extern int sock_get_timestampns(struct sock *, struct timespec __user *); @@ -1371,30 +1472,6 @@ extern int net_msg_warn; #define LIMIT_NETDEBUG(fmt, args...) \ do { if (net_msg_warn && net_ratelimit()) printk(fmt,##args); } while(0) -/* - * Macros for sleeping on a socket. Use them like this: - * - * SOCK_SLEEP_PRE(sk) - * if (condition) - * schedule(); - * SOCK_SLEEP_POST(sk) - * - * N.B. These are now obsolete and were, afaik, only ever used in DECnet - * and when the last use of them in DECnet has gone, I'm intending to - * remove them. - */ - -#define SOCK_SLEEP_PRE(sk) { struct task_struct *tsk = current; \ - DECLARE_WAITQUEUE(wait, tsk); \ - tsk->state = TASK_INTERRUPTIBLE; \ - add_wait_queue((sk)->sk_sleep, &wait); \ - release_sock(sk); - -#define SOCK_SLEEP_POST(sk) tsk->state = TASK_RUNNING; \ - remove_wait_queue((sk)->sk_sleep, &wait); \ - lock_sock(sk); \ - } - extern __u32 sysctl_wmem_max; extern __u32 sysctl_rmem_max;