X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=include%2Flinux%2Fskbuff.h;h=7cdfb4d52847e6f733df1538f48fc73508e3d13e;hb=7fee226a;hp=487e34507b4198ea1dbccab63b0580b1360c2151;hpb=def8b4faff5ca349beafbbfeb2c51f3602a6ef3a;p=safe%2Fjmp%2Flinux-2.6 diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 487e345..7cdfb4d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -15,6 +15,7 @@ #define _LINUX_SKBUFF_H #include +#include #include #include #include @@ -29,9 +30,6 @@ #include #include -#define HAVE_ALLOC_SKB /* For the drivers to know */ -#define HAVE_ALIGNABLE_SKB /* Ditto 8) */ - /* Don't change this without changing skb_csum_unnecessary! */ #define CHECKSUM_NONE 0 #define CHECKSUM_UNNECESSARY 1 @@ -135,25 +133,79 @@ struct skb_frag_struct { __u32 size; }; +#define HAVE_HW_TIME_STAMP + +/** + * struct skb_shared_hwtstamps - hardware time stamps + * @hwtstamp: hardware time stamp transformed into duration + * since arbitrary point in time + * @syststamp: hwtstamp transformed to system time base + * + * Software time stamps generated by ktime_get_real() are stored in + * skb->tstamp. The relation between the different kinds of time + * stamps is as follows: + * + * syststamp and tstamp can be compared against each other in + * arbitrary combinations. The accuracy of a + * syststamp/tstamp/"syststamp from other device" comparison is + * limited by the accuracy of the transformation into system time + * base. This depends on the device driver and its underlying + * hardware. + * + * hwtstamps can only be compared against other hwtstamps from + * the same device. + * + * This structure is attached to packets as part of the + * &skb_shared_info. Use skb_hwtstamps() to get a pointer. + */ +struct skb_shared_hwtstamps { + ktime_t hwtstamp; + ktime_t syststamp; +}; + +/** + * struct skb_shared_tx - instructions for time stamping of outgoing packets + * @hardware: generate hardware time stamp + * @software: generate software time stamp + * @in_progress: device driver is going to provide + * hardware time stamp + * @flags: all shared_tx flags + * + * These flags are attached to packets as part of the + * &skb_shared_info. Use skb_tx() to get a pointer. + */ +union skb_shared_tx { + struct { + __u8 hardware:1, + software:1, + in_progress:1; + }; + __u8 flags; +}; + /* This data is invariant across clones and lives at * the end of the header data, ie. at skb->end. */ struct skb_shared_info { - atomic_t dataref; unsigned short nr_frags; unsigned short gso_size; /* Warning: this field is not always filled in (UFO)! */ unsigned short gso_segs; unsigned short gso_type; __be32 ip6_frag_id; -#ifdef CONFIG_HAS_DMA - unsigned int num_dma_maps; -#endif + union skb_shared_tx tx_flags; struct sk_buff *frag_list; + struct skb_shared_hwtstamps hwtstamps; + + /* + * Warning : all fields before dataref are cleared in __alloc_skb() + */ + atomic_t dataref; + skb_frag_t frags[MAX_SKB_FRAGS]; -#ifdef CONFIG_HAS_DMA - dma_addr_t dma_maps[MAX_SKB_FRAGS + 1]; -#endif + /* Intermediate layers must ensure that destructor_arg + * remains valid until skb destructor */ + void * destructor_arg; }; /* We divide dataref into two halves. The higher 16 bits hold references @@ -188,6 +240,8 @@ enum { SKB_GSO_TCP_ECN = 1 << 3, SKB_GSO_TCPV6 = 1 << 4, + + SKB_GSO_FCOE = 1 << 5, }; #if BITS_PER_LONG > 32 @@ -210,7 +264,7 @@ typedef unsigned char *sk_buff_data_t; * @transport_header: Transport layer header * @network_header: Network layer header * @mac_header: Link layer header - * @dst: destination entry + * @_skb_refdst: destination entry (with norefcount bit) * @sp: the security path, used for xfrm * @cb: Control buffer. Free for use by every layer. Put private vars here * @len: Length of actual data @@ -244,12 +298,12 @@ typedef unsigned char *sk_buff_data_t; * @nfctinfo: Relationship of this skb to the connection * @nfct_reasm: netfilter conntrack re-assembly pointer * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c - * @iif: ifindex of device we arrived on + * @skb_iif: ifindex of device we arrived on + * @rxhash: the packet hash computed on receive * @queue_mapping: Queue mapping for multiqueue devices * @tc_index: Traffic control index * @tc_verd: traffic control verdict * @ndisc_nodetype: router type (from link layer) - * @do_not_encrypt: set to prevent encryption of this frame * @dma_cookie: a cookie to one of several possible DMA operations * done by skb DMA functions * @secmark: security marking @@ -261,25 +315,23 @@ struct sk_buff { struct sk_buff *next; struct sk_buff *prev; - struct sock *sk; ktime_t tstamp; + + struct sock *sk; struct net_device *dev; - union { - struct dst_entry *dst; - struct rtable *rtable; - }; -#ifdef CONFIG_XFRM - struct sec_path *sp; -#endif /* * This is the control buffer. It is free to use for every * layer. Please put your private variables there. If you * want to keep them across layers you have to do a skb_clone() * first. This is owned by whoever has the skb queued ATM. */ - char cb[48]; + char cb[48] __aligned(8); + unsigned long _skb_refdst; +#ifdef CONFIG_XFRM + struct sec_path *sp; +#endif unsigned int len, data_len; __u16 mac_len, @@ -292,6 +344,7 @@ struct sk_buff { }; }; __u32 priority; + kmemcheck_bitfield_begin(flags1); __u8 local_df:1, cloned:1, ip_summed:2, @@ -302,6 +355,7 @@ struct sk_buff { ipvs_property:1, peeked:1, nf_trace:1; + kmemcheck_bitfield_end(flags1); __be16 protocol; void (*destructor)(struct sk_buff *skb); @@ -313,21 +367,24 @@ struct sk_buff { struct nf_bridge_info *nf_bridge; #endif - int iif; - __u16 queue_mapping; + int skb_iif; #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ #ifdef CONFIG_NET_CLS_ACT __u16 tc_verd; /* traffic control verdict */ #endif #endif + + __u32 rxhash; + + kmemcheck_bitfield_begin(flags2); + __u16 queue_mapping:16; #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif -#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE) - __u8 do_not_encrypt:1; -#endif - /* 0/13/14 bit hole */ + kmemcheck_bitfield_end(flags2); + + /* 0/14 bit hole */ #ifdef CONFIG_NET_DMA dma_cookie_t dma_cookie; @@ -335,8 +392,10 @@ struct sk_buff { #ifdef CONFIG_NETWORK_SECMARK __u32 secmark; #endif - - __u32 mark; + union { + __u32 mark; + __u32 dropcount; + }; __u16 vlan_tci; @@ -360,15 +419,73 @@ struct sk_buff { #include -#ifdef CONFIG_HAS_DMA -#include -extern int skb_dma_map(struct device *dev, struct sk_buff *skb, - enum dma_data_direction dir); -extern void skb_dma_unmap(struct device *dev, struct sk_buff *skb, - enum dma_data_direction dir); -#endif +/* + * skb might have a dst pointer attached, refcounted or not. + * _skb_refdst low order bit is set if refcount was _not_ taken + */ +#define SKB_DST_NOREF 1UL +#define SKB_DST_PTRMASK ~(SKB_DST_NOREF) + +/** + * skb_dst - returns skb dst_entry + * @skb: buffer + * + * Returns skb dst_entry, regardless of reference taken or not. + */ +static inline struct dst_entry *skb_dst(const struct sk_buff *skb) +{ + /* If refdst was not refcounted, check we still are in a + * rcu_read_lock section + */ + WARN_ON((skb->_skb_refdst & SKB_DST_NOREF) && + !rcu_read_lock_held() && + !rcu_read_lock_bh_held()); + return (struct dst_entry *)(skb->_skb_refdst & SKB_DST_PTRMASK); +} + +/** + * skb_dst_set - sets skb dst + * @skb: buffer + * @dst: dst entry + * + * Sets skb dst, assuming a reference was taken on dst and should + * be released by skb_dst_drop() + */ +static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) +{ + skb->_skb_refdst = (unsigned long)dst; +} + +/** + * skb_dst_set_noref - sets skb dst, without a reference + * @skb: buffer + * @dst: dst entry + * + * Sets skb dst, assuming a reference was not taken on dst + * skb_dst_drop() should not dst_release() this dst + */ +static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) +{ + WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); + skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; +} + +/** + * skb_dst_is_noref - Test if skb dst isnt refcounted + * @skb: buffer + */ +static inline bool skb_dst_is_noref(const struct sk_buff *skb) +{ + return (skb->_skb_refdst & SKB_DST_NOREF) && skb_dst(skb); +} + +static inline struct rtable *skb_rtable(const struct sk_buff *skb) +{ + return (struct rtable *)skb_dst(skb); +} extern void kfree_skb(struct sk_buff *skb); +extern void consume_skb(struct sk_buff *skb); extern void __kfree_skb(struct sk_buff *skb); extern struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int fclone, int node); @@ -407,28 +524,14 @@ extern int skb_to_sgvec(struct sk_buff *skb, extern int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer); extern int skb_pad(struct sk_buff *skb, int pad); -#define dev_kfree_skb(a) kfree_skb(a) -extern void skb_over_panic(struct sk_buff *skb, int len, - void *here); -extern void skb_under_panic(struct sk_buff *skb, int len, - void *here); -extern void skb_truesize_bug(struct sk_buff *skb); - -static inline void skb_truesize_check(struct sk_buff *skb) -{ - int len = sizeof(struct sk_buff) + skb->len; - - if (unlikely((int)skb->truesize < len)) - skb_truesize_bug(skb); -} +#define dev_kfree_skb(a) consume_skb(a) extern int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, int getfrag(void *from, char *to, int offset, int len,int odd, struct sk_buff *skb), void *from, int length); -struct skb_seq_state -{ +struct skb_seq_state { __u32 lower_offset; __u32 upper_offset; __u32 frag_idx; @@ -464,6 +567,16 @@ static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) /* Internal */ #define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB))) +static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb) +{ + return &skb_shinfo(skb)->hwtstamps; +} + +static inline union skb_shared_tx *skb_tx(struct sk_buff *skb) +{ + return &skb_shinfo(skb)->tx_flags; +} + /** * skb_queue_empty - check if a queue is empty * @list: queue head @@ -489,6 +602,19 @@ static inline bool skb_queue_is_last(const struct sk_buff_head *list, } /** + * skb_queue_is_first - check if skb is the first entry in the queue + * @list: queue head + * @skb: buffer + * + * Returns true if @skb is the first buffer on the list. + */ +static inline bool skb_queue_is_first(const struct sk_buff_head *list, + const struct sk_buff *skb) +{ + return (skb->prev == (struct sk_buff *) list); +} + +/** * skb_queue_next - return the next packet in the queue * @list: queue head * @skb: current buffer @@ -507,6 +633,24 @@ static inline struct sk_buff *skb_queue_next(const struct sk_buff_head *list, } /** + * skb_queue_prev - return the prev packet in the queue + * @list: queue head + * @skb: current buffer + * + * Return the prev packet in @list before @skb. It is only valid to + * call this if skb_queue_is_first() evaluates to false. + */ +static inline struct sk_buff *skb_queue_prev(const struct sk_buff_head *list, + const struct sk_buff *skb) +{ + /* This BUG_ON may seem severe, but if we just return then we + * are going to dereference garbage. + */ + BUG_ON(skb_queue_is_first(list, skb)); + return skb->prev; +} + +/** * skb_get - reference buffer * @skb: buffer to reference * @@ -642,7 +786,7 @@ static inline struct sk_buff *skb_unshare(struct sk_buff *skb, } /** - * skb_peek + * skb_peek - peek at the head of an &sk_buff_head * @list_: list to peek at * * Peek an &sk_buff. Unlike most other operations you _MUST_ @@ -663,7 +807,7 @@ static inline struct sk_buff *skb_peek(struct sk_buff_head *list_) } /** - * skb_peek_tail + * skb_peek_tail - peek at the tail of an &sk_buff_head * @list_: list to peek at * * Peek an &sk_buff. Unlike most other operations you _MUST_ @@ -973,7 +1117,7 @@ extern void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, int size); #define SKB_PAGE_ASSERT(skb) BUG_ON(skb_shinfo(skb)->nr_frags) -#define SKB_FRAG_ASSERT(skb) BUG_ON(skb_shinfo(skb)->frag_list) +#define SKB_FRAG_ASSERT(skb) BUG_ON(skb_has_frags(skb)) #define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb)) #ifdef NET_SKBUFF_DATA_USES_OFFSET @@ -1039,6 +1183,11 @@ static inline unsigned char *__skb_pull(struct sk_buff *skb, unsigned int len) return skb->data += len; } +static inline unsigned char *skb_pull_inline(struct sk_buff *skb, unsigned int len) +{ + return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len); +} + extern unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta); static inline unsigned char *__pskb_pull(struct sk_buff *skb, unsigned int len) @@ -1235,12 +1384,12 @@ static inline int skb_network_offset(const struct sk_buff *skb) * shifting the start of the packet by 2 bytes. Drivers should do this * with: * - * skb_reserve(NET_IP_ALIGN); + * skb_reserve(skb, NET_IP_ALIGN); * * The downside to this alignment of the IP header is that the DMA is now * unaligned. On some architectures the cost of an unaligned DMA is high * and this cost outweighs the gains made by aligning the IP header. - * + * * Since this trade off varies between architectures, we allow NET_IP_ALIGN * to be overridden. */ @@ -1252,7 +1401,7 @@ static inline int skb_network_offset(const struct sk_buff *skb) * The networking layer reserves some headroom in skb data (via * dev_alloc_skb). This is used to avoid having to reallocate skb data when * the header has to grow. In the default case, if the header has to grow - * 16 bytes or less we avoid the reallocation. + * 32 bytes or less we avoid the reallocation. * * Unfortunately this headroom changes the DMA alignment of the resulting * network packet. As for NET_IP_ALIGN, this unaligned DMA is expensive @@ -1260,11 +1409,14 @@ static inline int skb_network_offset(const struct sk_buff *skb) * perhaps setting it to a cacheline in size (since that will maintain * cacheline alignment of the DMA). It must be a power of 2. * - * Various parts of the networking layer expect at least 16 bytes of + * Various parts of the networking layer expect at least 32 bytes of * headroom, you should not reduce this. + * With RPS, we raised NET_SKB_PAD to 64 so that get_rps_cpus() fetches span + * a 64 bytes aligned block to fit modern (>= 64 bytes) cache line sizes + * NET_IP_ALIGN(2) + ethernet_header(14) + IP_header(20/40) + ports(8) */ #ifndef NET_SKB_PAD -#define NET_SKB_PAD 16 +#define NET_SKB_PAD 64 #endif extern int ___pskb_trim(struct sk_buff *skb, unsigned int len); @@ -1386,6 +1538,16 @@ static inline struct sk_buff *netdev_alloc_skb(struct net_device *dev, return __netdev_alloc_skb(dev, length, GFP_ATOMIC); } +static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev, + unsigned int length) +{ + struct sk_buff *skb = netdev_alloc_skb(dev, length + NET_IP_ALIGN); + + if (NET_IP_ALIGN && skb) + skb_reserve(skb, NET_IP_ALIGN); + return skb; +} + extern struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask); /** @@ -1612,6 +1774,25 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) skb = skb->prev) +static inline bool skb_has_frags(const struct sk_buff *skb) +{ + return skb_shinfo(skb)->frag_list != NULL; +} + +static inline void skb_frag_list_init(struct sk_buff *skb) +{ + skb_shinfo(skb)->frag_list = NULL; +} + +static inline void skb_frag_add_head(struct sk_buff *skb, struct sk_buff *frag) +{ + frag->next = skb_shinfo(skb)->frag_list; + skb_shinfo(skb)->frag_list = frag; +} + +#define skb_walk_frags(skb, iter) \ + for (iter = skb_shinfo(skb)->frag_list; iter; iter = iter->next) + extern struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, int *peeked, int *err); extern struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, @@ -1626,9 +1807,17 @@ extern int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, struct iovec *iov); extern int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, - struct iovec *from, + const struct iovec *from, + int from_offset, int len); +extern int skb_copy_datagram_const_iovec(const struct sk_buff *from, + int offset, + const struct iovec *to, + int to_offset, + int size); extern void skb_free_datagram(struct sock *sk, struct sk_buff *skb); +extern void skb_free_datagram_locked(struct sock *sk, + struct sk_buff *skb); extern int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); extern __wsum skb_checksum(const struct sk_buff *skb, int offset, @@ -1648,6 +1837,8 @@ extern int skb_splice_bits(struct sk_buff *skb, extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); extern void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); +extern int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, + int shiftlen); extern struct sk_buff *skb_segment(struct sk_buff *skb, int features); @@ -1696,6 +1887,11 @@ static inline void skb_copy_to_linear_data_offset(struct sk_buff *skb, extern void skb_init(void); +static inline ktime_t skb_get_ktime(const struct sk_buff *skb) +{ + return skb->tstamp; +} + /** * skb_get_timestamp - get timestamp from a skb * @skb: skb to get stamp from @@ -1705,11 +1901,18 @@ extern void skb_init(void); * This function converts the offset back to a struct timeval and stores * it in stamp. */ -static inline void skb_get_timestamp(const struct sk_buff *skb, struct timeval *stamp) +static inline void skb_get_timestamp(const struct sk_buff *skb, + struct timeval *stamp) { *stamp = ktime_to_timeval(skb->tstamp); } +static inline void skb_get_timestampns(const struct sk_buff *skb, + struct timespec *stamp) +{ + *stamp = ktime_to_timespec(skb->tstamp); +} + static inline void __net_timestamp(struct sk_buff *skb) { skb->tstamp = ktime_get_real(); @@ -1725,6 +1928,20 @@ static inline ktime_t net_invalid_timestamp(void) return ktime_set(0, 0); } +/** + * skb_tstamp_tx - queue clone of skb with send time stamps + * @orig_skb: the original outgoing packet + * @hwtstamps: hardware time stamps, may be NULL if not available + * + * If the skb has a socket associated, then this function clones the + * skb (thus sharing the actual data and optional structures), stores + * the optional hardware time stamping information (if non NULL) or + * generates a software time stamp (otherwise), then queues the clone + * to the error queue of the socket. Errors are silently ignored. + */ +extern void skb_tstamp_tx(struct sk_buff *orig_skb, + struct skb_shared_hwtstamps *hwtstamps); + extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len); extern __sum16 __skb_checksum_complete(struct sk_buff *skb); @@ -1855,7 +2072,7 @@ static inline void skb_set_queue_mapping(struct sk_buff *skb, u16 queue_mapping) skb->queue_mapping = queue_mapping; } -static inline u16 skb_get_queue_mapping(struct sk_buff *skb) +static inline u16 skb_get_queue_mapping(const struct sk_buff *skb) { return skb->queue_mapping; } @@ -1865,6 +2082,24 @@ static inline void skb_copy_queue_mapping(struct sk_buff *to, const struct sk_bu to->queue_mapping = from->queue_mapping; } +static inline void skb_record_rx_queue(struct sk_buff *skb, u16 rx_queue) +{ + skb->queue_mapping = rx_queue + 1; +} + +static inline u16 skb_get_rx_queue(const struct sk_buff *skb) +{ + return skb->queue_mapping - 1; +} + +static inline bool skb_rx_queue_recorded(const struct sk_buff *skb) +{ + return (skb->queue_mapping != 0); +} + +extern u16 skb_tx_hash(const struct net_device *dev, + const struct sk_buff *skb); + #ifdef CONFIG_XFRM static inline struct sec_path *skb_sec_path(struct sk_buff *skb) {