X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=net%2Fcore%2Fdatagram.c;h=2dccd4ee591b3a755242c05b8889489a380dc8bd;hb=f935aa9e99d6ec74a50871c120e6b21de7256efb;hp=f8d322e1ea9276c3f581fbda2393c829b6fd17f0;hpb=3305b80c214c642b89cd5c21af83bc91ec13f8bd;p=safe%2Fjmp%2Flinux-2.6 diff --git a/net/core/datagram.c b/net/core/datagram.c index f8d322e..2dccd4e 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -9,7 +9,7 @@ * identical recvmsg() code. So we share it here. The poll was * shared before but buried in udp.c so I moved it. * - * Authors: Alan Cox . (datagram_poll() from old + * Authors: Alan Cox . (datagram_poll() from old * udp.c code) * * Fixes: @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -55,6 +56,7 @@ #include #include #include +#include /* * Is a socket 'connection oriented' ? @@ -64,13 +66,25 @@ static inline int connection_based(struct sock *sk) return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM; } +static int receiver_wake_function(wait_queue_t *wait, unsigned mode, int sync, + void *key) +{ + unsigned long bits = (unsigned long)key; + + /* + * Avoid a wakeup if event not interesting for us + */ + if (bits && !(bits & (POLLIN | POLLERR))) + return 0; + return autoremove_wake_function(wait, mode, sync, key); +} /* * Wait for a packet.. */ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p) { int error; - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, receiver_wake_function); prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); @@ -115,10 +129,10 @@ out_noerr: } /** - * skb_recv_datagram - Receive a datagram skbuff + * __skb_recv_datagram - Receive a datagram skbuff * @sk: socket * @flags: MSG_ flags - * @noblock: blocking operation? + * @peeked: returns non-zero if this packet has been seen before * @err: error code returned * * Get a datagram skbuff, understands the peeking, nonblocking wakeups @@ -143,8 +157,8 @@ out_noerr: * quite explicitly by POSIX 1003.1g, don't change them without having * the standard around please. */ -struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, - int noblock, int *err) +struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, + int *peeked, int *err) { struct sk_buff *skb; long timeo; @@ -156,7 +170,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, if (error) goto no_packet; - timeo = sock_rcvtimeo(sk, noblock); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); do { /* Again only user level code calls this function, so nothing @@ -165,18 +179,19 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, * Look at current nfs client by the way... * However, this function was corrent in any case. 8) */ - if (flags & MSG_PEEK) { - unsigned long cpu_flags; - - spin_lock_irqsave(&sk->sk_receive_queue.lock, - cpu_flags); - skb = skb_peek(&sk->sk_receive_queue); - if (skb) + unsigned long cpu_flags; + + spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags); + skb = skb_peek(&sk->sk_receive_queue); + if (skb) { + *peeked = skb->peeked; + if (flags & MSG_PEEK) { + skb->peeked = 1; atomic_inc(&skb->users); - spin_unlock_irqrestore(&sk->sk_receive_queue.lock, - cpu_flags); - } else - skb = skb_dequeue(&sk->sk_receive_queue); + } else + __skb_unlink(skb, &sk->sk_receive_queue); + } + spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags); if (skb) return skb; @@ -194,11 +209,31 @@ no_packet: *err = error; return NULL; } +EXPORT_SYMBOL(__skb_recv_datagram); + +struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, + int noblock, int *err) +{ + int peeked; + + return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), + &peeked, err); +} void skb_free_datagram(struct sock *sk, struct sk_buff *skb) { - kfree_skb(skb); + consume_skb(skb); + sk_mem_reclaim_partial(sk); } +EXPORT_SYMBOL(skb_free_datagram); + +void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) +{ + lock_sock(sk); + skb_free_datagram(sk, skb); + release_sock(sk); +} +EXPORT_SYMBOL(skb_free_datagram_locked); /** * skb_kill_datagram - Free a datagram skbuff forcibly @@ -217,20 +252,30 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb) * This function currently only disables BH when acquiring the * sk_receive_queue lock. Therefore it must not be used in a * context where that lock is acquired in an IRQ context. + * + * It returns 0 if the packet was removed by us. */ -void skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) +int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) { + int err = 0; + if (flags & MSG_PEEK) { + err = -ENOENT; spin_lock_bh(&sk->sk_receive_queue.lock); if (skb == skb_peek(&sk->sk_receive_queue)) { __skb_unlink(skb, &sk->sk_receive_queue); atomic_dec(&skb->users); + err = 0; } spin_unlock_bh(&sk->sk_receive_queue.lock); } kfree_skb(skb); + atomic_inc(&sk->sk_drops); + sk_mem_reclaim_partial(sk); + + return err; } EXPORT_SYMBOL(skb_kill_datagram); @@ -247,60 +292,267 @@ EXPORT_SYMBOL(skb_kill_datagram); int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, struct iovec *to, int len) { - int i, err, fraglen, end = 0; - struct sk_buff *next = skb_shinfo(skb)->frag_list; + int start = skb_headlen(skb); + int i, copy = start - offset; + struct sk_buff *frag_iter; + + trace_skb_copy_datagram_iovec(skb, len); + + /* Copy header. */ + if (copy > 0) { + if (copy > len) + copy = len; + if (memcpy_toiovec(to, skb->data + offset, copy)) + goto fault; + if ((len -= copy) == 0) + return 0; + offset += copy; + } + + /* Copy paged appendix. Hmm... why does this look so complicated? */ + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + int end; + + WARN_ON(start > offset + len); + + end = start + skb_shinfo(skb)->frags[i].size; + if ((copy = end - offset) > 0) { + int err; + u8 *vaddr; + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + struct page *page = frag->page; + + if (copy > len) + copy = len; + vaddr = kmap(page); + err = memcpy_toiovec(to, vaddr + frag->page_offset + + offset - start, copy); + kunmap(page); + if (err) + goto fault; + if (!(len -= copy)) + return 0; + offset += copy; + } + start = end; + } + + skb_walk_frags(skb, frag_iter) { + int end; + + WARN_ON(start > offset + len); + + end = start + frag_iter->len; + if ((copy = end - offset) > 0) { + if (copy > len) + copy = len; + if (skb_copy_datagram_iovec(frag_iter, + offset - start, + to, copy)) + goto fault; + if ((len -= copy) == 0) + return 0; + offset += copy; + } + start = end; + } + if (!len) + return 0; + +fault: + return -EFAULT; +} + +/** + * skb_copy_datagram_const_iovec - Copy a datagram to an iovec. + * @skb: buffer to copy + * @offset: offset in the buffer to start copying from + * @to: io vector to copy to + * @to_offset: offset in the io vector to start copying to + * @len: amount of data to copy from buffer to iovec + * + * Returns 0 or -EFAULT. + * Note: the iovec is not modified during the copy. + */ +int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset, + const struct iovec *to, int to_offset, + int len) +{ + int start = skb_headlen(skb); + int i, copy = start - offset; + struct sk_buff *frag_iter; + + /* Copy header. */ + if (copy > 0) { + if (copy > len) + copy = len; + if (memcpy_toiovecend(to, skb->data + offset, to_offset, copy)) + goto fault; + if ((len -= copy) == 0) + return 0; + offset += copy; + to_offset += copy; + } + + /* Copy paged appendix. Hmm... why does this look so complicated? */ + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + int end; + WARN_ON(start > offset + len); + + end = start + skb_shinfo(skb)->frags[i].size; + if ((copy = end - offset) > 0) { + int err; + u8 *vaddr; + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + struct page *page = frag->page; + + if (copy > len) + copy = len; + vaddr = kmap(page); + err = memcpy_toiovecend(to, vaddr + frag->page_offset + + offset - start, to_offset, copy); + kunmap(page); + if (err) + goto fault; + if (!(len -= copy)) + return 0; + offset += copy; + to_offset += copy; + } + start = end; + } + + skb_walk_frags(skb, frag_iter) { + int end; + + WARN_ON(start > offset + len); + + end = start + frag_iter->len; + if ((copy = end - offset) > 0) { + if (copy > len) + copy = len; + if (skb_copy_datagram_const_iovec(frag_iter, + offset - start, + to, to_offset, + copy)) + goto fault; + if ((len -= copy) == 0) + return 0; + offset += copy; + to_offset += copy; + } + start = end; + } if (!len) return 0; -next_skb: - fraglen = skb_headlen(skb); - i = -1; +fault: + return -EFAULT; +} +EXPORT_SYMBOL(skb_copy_datagram_const_iovec); - while (1) { - int start = end; +/** + * skb_copy_datagram_from_iovec - Copy a datagram from an iovec. + * @skb: buffer to copy + * @offset: offset in the buffer to start copying to + * @from: io vector to copy to + * @from_offset: offset in the io vector to start copying from + * @len: amount of data to copy to buffer from iovec + * + * Returns 0 or -EFAULT. + * Note: the iovec is not modified during the copy. + */ +int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, + const struct iovec *from, int from_offset, + int len) +{ + int start = skb_headlen(skb); + int i, copy = start - offset; + struct sk_buff *frag_iter; - if ((end += fraglen) > offset) { - int copy = end - offset, o = offset - start; + /* Copy header. */ + if (copy > 0) { + if (copy > len) + copy = len; + if (memcpy_fromiovecend(skb->data + offset, from, from_offset, + copy)) + goto fault; + if ((len -= copy) == 0) + return 0; + offset += copy; + from_offset += copy; + } + + /* Copy paged appendix. Hmm... why does this look so complicated? */ + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + int end; + + WARN_ON(start > offset + len); + + end = start + skb_shinfo(skb)->frags[i].size; + if ((copy = end - offset) > 0) { + int err; + u8 *vaddr; + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + struct page *page = frag->page; if (copy > len) copy = len; - if (i == -1) - err = memcpy_toiovec(to, skb->data + o, copy); - else { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; - void *p = kmap(page) + frag->page_offset + o; - err = memcpy_toiovec(to, p, copy); - kunmap(page); - } + vaddr = kmap(page); + err = memcpy_fromiovecend(vaddr + frag->page_offset + + offset - start, + from, from_offset, copy); + kunmap(page); if (err) goto fault; + if (!(len -= copy)) return 0; offset += copy; + from_offset += copy; } - if (++i >= skb_shinfo(skb)->nr_frags) - break; - fraglen = skb_shinfo(skb)->frags[i].size; + start = end; } - if (next) { - skb = next; - BUG_ON(skb_shinfo(skb)->frag_list); - next = skb->next; - goto next_skb; + + skb_walk_frags(skb, frag_iter) { + int end; + + WARN_ON(start > offset + len); + + end = start + frag_iter->len; + if ((copy = end - offset) > 0) { + if (copy > len) + copy = len; + if (skb_copy_datagram_from_iovec(frag_iter, + offset - start, + from, + from_offset, + copy)) + goto fault; + if ((len -= copy) == 0) + return 0; + offset += copy; + from_offset += copy; + } + start = end; } + if (!len) + return 0; + fault: return -EFAULT; } +EXPORT_SYMBOL(skb_copy_datagram_from_iovec); static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 __user *to, int len, - unsigned int *csump) + __wsum *csump) { int start = skb_headlen(skb); - int pos = 0; int i, copy = start - offset; + struct sk_buff *frag_iter; + int pos = 0; /* Copy header. */ if (copy > 0) { @@ -321,11 +573,11 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { - unsigned int csum2; + __wsum csum2; int err = 0; u8 *vaddr; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; @@ -351,33 +603,29 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, start = end; } - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list=list->next) { - int end; - - BUG_TRAP(start <= offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - unsigned int csum2 = 0; - if (copy > len) - copy = len; - if (skb_copy_and_csum_datagram(list, - offset - start, - to, copy, - &csum2)) - goto fault; - *csump = csum_block_add(*csump, csum2, pos); - if ((len -= copy) == 0) - return 0; - offset += copy; - to += copy; - pos += copy; - } - start = end; + skb_walk_frags(skb, frag_iter) { + int end; + + WARN_ON(start > offset + len); + + end = start + frag_iter->len; + if ((copy = end - offset) > 0) { + __wsum csum2 = 0; + if (copy > len) + copy = len; + if (skb_copy_and_csum_datagram(frag_iter, + offset - start, + to, copy, + &csum2)) + goto fault; + *csump = csum_block_add(*csump, csum2, pos); + if ((len -= copy) == 0) + return 0; + offset += copy; + to += copy; + pos += copy; } + start = end; } if (!len) return 0; @@ -386,18 +634,24 @@ fault: return -EFAULT; } -unsigned int __skb_checksum_complete(struct sk_buff *skb) +__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len) { - unsigned int sum; + __sum16 sum; - sum = (u16)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)); + sum = csum_fold(skb_checksum(skb, 0, len, skb->csum)); if (likely(!sum)) { - if (unlikely(skb->ip_summed == CHECKSUM_HW)) + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) netdev_rx_csum_fault(skb->dev); skb->ip_summed = CHECKSUM_UNNECESSARY; } return sum; } +EXPORT_SYMBOL(__skb_checksum_complete_head); + +__sum16 __skb_checksum_complete(struct sk_buff *skb) +{ + return __skb_checksum_complete_head(skb, skb->len); +} EXPORT_SYMBOL(__skb_checksum_complete); /** @@ -405,7 +659,7 @@ EXPORT_SYMBOL(__skb_checksum_complete); * @skb: skbuff * @hlen: hardware length * @iov: io vector - * + * * Caller _must_ check that skb will fit to this iovec. * * Returns: 0 - success. @@ -416,9 +670,12 @@ EXPORT_SYMBOL(__skb_checksum_complete); int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen, struct iovec *iov) { - unsigned int csum; + __wsum csum; int chunk = skb->len - hlen; + if (!chunk) + return 0; + /* Skip filled elements. * Pretty silly, look at memcpy_toiovec, though 8) */ @@ -435,9 +692,9 @@ int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base, chunk, &csum)) goto fault; - if ((unsigned short)csum_fold(csum)) + if (csum_fold(csum)) goto csum_error; - if (unlikely(skb->ip_summed == CHECKSUM_HW)) + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) netdev_rx_csum_fault(skb->dev); iov->iov_len -= chunk; iov->iov_base += chunk; @@ -469,12 +726,14 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk; unsigned int mask; - poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk->sk_sleep, wait); mask = 0; /* exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) mask |= POLLERR; + if (sk->sk_shutdown & RCV_SHUTDOWN) + mask |= POLLRDHUP; if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= POLLHUP; @@ -504,5 +763,4 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, EXPORT_SYMBOL(datagram_poll); EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec); EXPORT_SYMBOL(skb_copy_datagram_iovec); -EXPORT_SYMBOL(skb_free_datagram); EXPORT_SYMBOL(skb_recv_datagram);