X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=drivers%2Fnet%2Fvirtio_net.c;h=b0a85d0387962b6ce2394637f9ae6dbbac5f6684;hb=f8965467f366fd18f01feafb5db10512d7b4422c;hp=52198f6797a48a88a76bfee8cc4f6cfe64925ea8;hpb=9cbc1cb8cd46ce1f7645b9de249b2ce8460129bb;p=safe%2Fjmp%2Flinux-2.6 diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 52198f6..b0a85d0 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1,4 +1,4 @@ -/* A simple network driver using virtio. +/* A network driver using virtio. * * Copyright 2007 Rusty Russell IBM Corporation * @@ -25,6 +25,7 @@ #include #include #include +#include static int napi_weight = 128; module_param(napi_weight, int, 0444); @@ -39,69 +40,79 @@ module_param(gso, bool, 0444); #define VIRTNET_SEND_COMMAND_SG_MAX 2 -struct virtnet_info -{ +struct virtnet_info { struct virtio_device *vdev; struct virtqueue *rvq, *svq, *cvq; struct net_device *dev; struct napi_struct napi; unsigned int status; - /* The skb we couldn't send because buffers were full. */ - struct sk_buff *last_xmit_skb; - - /* If we need to free in a timer, this is it. */ - struct timer_list xmit_free_timer; - /* Number of input buffers, and max we've ever had. */ unsigned int num, max; - /* For cleaning up after transmission. */ - struct tasklet_struct tasklet; - bool free_in_tasklet; - /* I like... big packets and I cannot lie! */ bool big_packets; /* Host will merge rx buffers for big packets (shake it! shake it!) */ bool mergeable_rx_bufs; - /* Receive & send queues. */ - struct sk_buff_head recv; - struct sk_buff_head send; + /* Work struct for refilling if we run low on memory. */ + struct delayed_work refill; /* Chain pages by the private ptr. */ struct page *pages; + + /* fragments + linear part + virtio header */ + struct scatterlist rx_sg[MAX_SKB_FRAGS + 2]; + struct scatterlist tx_sg[MAX_SKB_FRAGS + 2]; }; -static inline void *skb_vnet_hdr(struct sk_buff *skb) -{ - return (struct virtio_net_hdr *)skb->cb; -} +struct skb_vnet_hdr { + union { + struct virtio_net_hdr hdr; + struct virtio_net_hdr_mrg_rxbuf mhdr; + }; + unsigned int num_sg; +}; + +struct padded_vnet_hdr { + struct virtio_net_hdr hdr; + /* + * virtio_net_hdr should be in a separated sg buffer because of a + * QEMU bug, and data sg buffer shares same page with this header sg. + * This padding makes next sg 16 byte aligned after virtio_net_hdr. + */ + char padding[6]; +}; -static void give_a_page(struct virtnet_info *vi, struct page *page) +static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb) { - page->private = (unsigned long)vi->pages; - vi->pages = page; + return (struct skb_vnet_hdr *)skb->cb; } -static void trim_pages(struct virtnet_info *vi, struct sk_buff *skb) +/* + * private is used to chain pages for big packets, put the whole + * most recent used list in the beginning for reuse + */ +static void give_pages(struct virtnet_info *vi, struct page *page) { - unsigned int i; + struct page *end; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - give_a_page(vi, skb_shinfo(skb)->frags[i].page); - skb_shinfo(skb)->nr_frags = 0; - skb->data_len = 0; + /* Find end of list, sew whole thing into vi->pages. */ + for (end = page; end->private; end = (struct page *)end->private); + end->private = (unsigned long)vi->pages; + vi->pages = page; } static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask) { struct page *p = vi->pages; - if (p) + if (p) { vi->pages = (struct page *)p->private; - else + /* clear private here, it is used to chain pages */ + p->private = 0; + } else p = alloc_page(gfp_mask); return p; } @@ -115,113 +126,153 @@ static void skb_xmit_done(struct virtqueue *svq) /* We were probably waiting for more output buffers. */ netif_wake_queue(vi->dev); +} - /* Make sure we re-xmit last_xmit_skb: if there are no more packets - * queued, start_xmit won't be called. */ - tasklet_schedule(&vi->tasklet); +static void set_skb_frag(struct sk_buff *skb, struct page *page, + unsigned int offset, unsigned int *len) +{ + int i = skb_shinfo(skb)->nr_frags; + skb_frag_t *f; + + f = &skb_shinfo(skb)->frags[i]; + f->size = min((unsigned)PAGE_SIZE - offset, *len); + f->page_offset = offset; + f->page = page; + + skb->data_len += f->size; + skb->len += f->size; + skb_shinfo(skb)->nr_frags++; + *len -= f->size; } -static void receive_skb(struct net_device *dev, struct sk_buff *skb, - unsigned len) +static struct sk_buff *page_to_skb(struct virtnet_info *vi, + struct page *page, unsigned int len) { - struct virtnet_info *vi = netdev_priv(dev); - struct virtio_net_hdr *hdr = skb_vnet_hdr(skb); - int err; - int i; + struct sk_buff *skb; + struct skb_vnet_hdr *hdr; + unsigned int copy, hdr_len, offset; + char *p; - if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { - pr_debug("%s: short packet %i\n", dev->name, len); - dev->stats.rx_length_errors++; - goto drop; - } + p = page_address(page); - if (vi->mergeable_rx_bufs) { - struct virtio_net_hdr_mrg_rxbuf *mhdr = skb_vnet_hdr(skb); - unsigned int copy; - char *p = page_address(skb_shinfo(skb)->frags[0].page); + /* copy small packet so we can reuse these pages for small data */ + skb = netdev_alloc_skb_ip_align(vi->dev, GOOD_COPY_LEN); + if (unlikely(!skb)) + return NULL; - if (len > PAGE_SIZE) - len = PAGE_SIZE; - len -= sizeof(struct virtio_net_hdr_mrg_rxbuf); + hdr = skb_vnet_hdr(skb); - memcpy(hdr, p, sizeof(*mhdr)); - p += sizeof(*mhdr); + if (vi->mergeable_rx_bufs) { + hdr_len = sizeof hdr->mhdr; + offset = hdr_len; + } else { + hdr_len = sizeof hdr->hdr; + offset = sizeof(struct padded_vnet_hdr); + } - copy = len; - if (copy > skb_tailroom(skb)) - copy = skb_tailroom(skb); + memcpy(hdr, p, hdr_len); - memcpy(skb_put(skb, copy), p, copy); + len -= hdr_len; + p += offset; - len -= copy; + copy = len; + if (copy > skb_tailroom(skb)) + copy = skb_tailroom(skb); + memcpy(skb_put(skb, copy), p, copy); - if (!len) { - give_a_page(vi, skb_shinfo(skb)->frags[0].page); - skb_shinfo(skb)->nr_frags--; - } else { - skb_shinfo(skb)->frags[0].page_offset += - sizeof(*mhdr) + copy; - skb_shinfo(skb)->frags[0].size = len; - skb->data_len += len; - skb->len += len; - } + len -= copy; + offset += copy; - while (--mhdr->num_buffers) { - struct sk_buff *nskb; + while (len) { + set_skb_frag(skb, page, offset, &len); + page = (struct page *)page->private; + offset = 0; + } - i = skb_shinfo(skb)->nr_frags; - if (i >= MAX_SKB_FRAGS) { - pr_debug("%s: packet too long %d\n", dev->name, - len); - dev->stats.rx_length_errors++; - goto drop; - } + if (page) + give_pages(vi, page); - nskb = vi->rvq->vq_ops->get_buf(vi->rvq, &len); - if (!nskb) { - pr_debug("%s: rx error: %d buffers missing\n", - dev->name, mhdr->num_buffers); - dev->stats.rx_length_errors++; - goto drop; - } + return skb; +} - __skb_unlink(nskb, &vi->recv); - vi->num--; +static int receive_mergeable(struct virtnet_info *vi, struct sk_buff *skb) +{ + struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb); + struct page *page; + int num_buf, i, len; + + num_buf = hdr->mhdr.num_buffers; + while (--num_buf) { + i = skb_shinfo(skb)->nr_frags; + if (i >= MAX_SKB_FRAGS) { + pr_debug("%s: packet too long\n", skb->dev->name); + skb->dev->stats.rx_length_errors++; + return -EINVAL; + } - skb_shinfo(skb)->frags[i] = skb_shinfo(nskb)->frags[0]; - skb_shinfo(nskb)->nr_frags = 0; - kfree_skb(nskb); + page = vi->rvq->vq_ops->get_buf(vi->rvq, &len); + if (!page) { + pr_debug("%s: rx error: %d buffers missing\n", + skb->dev->name, hdr->mhdr.num_buffers); + skb->dev->stats.rx_length_errors++; + return -EINVAL; + } + if (len > PAGE_SIZE) + len = PAGE_SIZE; - if (len > PAGE_SIZE) - len = PAGE_SIZE; + set_skb_frag(skb, page, 0, &len); - skb_shinfo(skb)->frags[i].size = len; - skb_shinfo(skb)->nr_frags++; - skb->data_len += len; - skb->len += len; - } - } else { - len -= sizeof(struct virtio_net_hdr); + --vi->num; + } + return 0; +} - if (len <= MAX_PACKET_LEN) - trim_pages(vi, skb); +static void receive_buf(struct net_device *dev, void *buf, unsigned int len) +{ + struct virtnet_info *vi = netdev_priv(dev); + struct sk_buff *skb; + struct page *page; + struct skb_vnet_hdr *hdr; - err = pskb_trim(skb, len); - if (err) { - pr_debug("%s: pskb_trim failed %i %d\n", dev->name, - len, err); + if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { + pr_debug("%s: short packet %i\n", dev->name, len); + dev->stats.rx_length_errors++; + if (vi->mergeable_rx_bufs || vi->big_packets) + give_pages(vi, buf); + else + dev_kfree_skb(buf); + return; + } + + if (!vi->mergeable_rx_bufs && !vi->big_packets) { + skb = buf; + len -= sizeof(struct virtio_net_hdr); + skb_trim(skb, len); + } else { + page = buf; + skb = page_to_skb(vi, page, len); + if (unlikely(!skb)) { dev->stats.rx_dropped++; - goto drop; + give_pages(vi, page); + return; } + if (vi->mergeable_rx_bufs) + if (receive_mergeable(vi, skb)) { + dev_kfree_skb(skb); + return; + } } + hdr = skb_vnet_hdr(skb); skb->truesize += skb->data_len; dev->stats.rx_bytes += skb->len; dev->stats.rx_packets++; - if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { + if (hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { pr_debug("Needs csum!\n"); - if (!skb_partial_csum_set(skb,hdr->csum_start,hdr->csum_offset)) + if (!skb_partial_csum_set(skb, + hdr->hdr.csum_start, + hdr->hdr.csum_offset)) goto frame_err; } @@ -229,9 +280,9 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, pr_debug("Receiving skb proto 0x%04x len %i type %i\n", ntohs(skb->protocol), skb->len, skb->pkt_type); - if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { + if (hdr->hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) { pr_debug("GSO!\n"); - switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + switch (hdr->hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; break; @@ -244,14 +295,14 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, default: if (net_ratelimit()) printk(KERN_WARNING "%s: bad gso type %u.\n", - dev->name, hdr->gso_type); + dev->name, hdr->hdr.gso_type); goto frame_err; } - if (hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) + if (hdr->hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN) skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; - skb_shinfo(skb)->gso_size = hdr->gso_size; + skb_shinfo(skb)->gso_size = hdr->hdr.gso_size; if (skb_shinfo(skb)->gso_size == 0) { if (net_ratelimit()) printk(KERN_WARNING "%s: zero gso size.\n", @@ -269,110 +320,121 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, frame_err: dev->stats.rx_frame_errors++; -drop: dev_kfree_skb(skb); } -static void try_fill_recv_maxbufs(struct virtnet_info *vi) +static int add_recvbuf_small(struct virtnet_info *vi, gfp_t gfp) { struct sk_buff *skb; - struct scatterlist sg[2+MAX_SKB_FRAGS]; - int num, err, i; - - sg_init_table(sg, 2+MAX_SKB_FRAGS); - for (;;) { - struct virtio_net_hdr *hdr; + struct skb_vnet_hdr *hdr; + int err; - skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN + NET_IP_ALIGN); - if (unlikely(!skb)) - break; + skb = netdev_alloc_skb_ip_align(vi->dev, MAX_PACKET_LEN); + if (unlikely(!skb)) + return -ENOMEM; - skb_reserve(skb, NET_IP_ALIGN); - skb_put(skb, MAX_PACKET_LEN); + skb_put(skb, MAX_PACKET_LEN); - hdr = skb_vnet_hdr(skb); - sg_set_buf(sg, hdr, sizeof(*hdr)); + hdr = skb_vnet_hdr(skb); + sg_set_buf(vi->rx_sg, &hdr->hdr, sizeof hdr->hdr); - if (vi->big_packets) { - for (i = 0; i < MAX_SKB_FRAGS; i++) { - skb_frag_t *f = &skb_shinfo(skb)->frags[i]; - f->page = get_a_page(vi, GFP_ATOMIC); - if (!f->page) - break; + skb_to_sgvec(skb, vi->rx_sg + 1, 0, skb->len); - f->page_offset = 0; - f->size = PAGE_SIZE; + err = vi->rvq->vq_ops->add_buf(vi->rvq, vi->rx_sg, 0, 2, skb); + if (err < 0) + dev_kfree_skb(skb); - skb->data_len += PAGE_SIZE; - skb->len += PAGE_SIZE; + return err; +} - skb_shinfo(skb)->nr_frags++; - } +static int add_recvbuf_big(struct virtnet_info *vi, gfp_t gfp) +{ + struct page *first, *list = NULL; + char *p; + int i, err, offset; + + /* page in vi->rx_sg[MAX_SKB_FRAGS + 1] is list tail */ + for (i = MAX_SKB_FRAGS + 1; i > 1; --i) { + first = get_a_page(vi, gfp); + if (!first) { + if (list) + give_pages(vi, list); + return -ENOMEM; } + sg_set_buf(&vi->rx_sg[i], page_address(first), PAGE_SIZE); - num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; - skb_queue_head(&vi->recv, skb); + /* chain new page in list head to match sg */ + first->private = (unsigned long)list; + list = first; + } - err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, num, skb); - if (err) { - skb_unlink(skb, &vi->recv); - trim_pages(vi, skb); - kfree_skb(skb); - break; - } - vi->num++; + first = get_a_page(vi, gfp); + if (!first) { + give_pages(vi, list); + return -ENOMEM; } - if (unlikely(vi->num > vi->max)) - vi->max = vi->num; - vi->rvq->vq_ops->kick(vi->rvq); + p = page_address(first); + + /* vi->rx_sg[0], vi->rx_sg[1] share the same page */ + /* a separated vi->rx_sg[0] for virtio_net_hdr only due to QEMU bug */ + sg_set_buf(&vi->rx_sg[0], p, sizeof(struct virtio_net_hdr)); + + /* vi->rx_sg[1] for data packet, from offset */ + offset = sizeof(struct padded_vnet_hdr); + sg_set_buf(&vi->rx_sg[1], p + offset, PAGE_SIZE - offset); + + /* chain first in list head */ + first->private = (unsigned long)list; + err = vi->rvq->vq_ops->add_buf(vi->rvq, vi->rx_sg, 0, MAX_SKB_FRAGS + 2, + first); + if (err < 0) + give_pages(vi, first); + + return err; } -static void try_fill_recv(struct virtnet_info *vi) +static int add_recvbuf_mergeable(struct virtnet_info *vi, gfp_t gfp) { - struct sk_buff *skb; - struct scatterlist sg[1]; + struct page *page; int err; - if (!vi->mergeable_rx_bufs) { - try_fill_recv_maxbufs(vi); - return; - } - - for (;;) { - skb_frag_t *f; - - skb = netdev_alloc_skb(vi->dev, GOOD_COPY_LEN + NET_IP_ALIGN); - if (unlikely(!skb)) - break; + page = get_a_page(vi, gfp); + if (!page) + return -ENOMEM; - skb_reserve(skb, NET_IP_ALIGN); + sg_init_one(vi->rx_sg, page_address(page), PAGE_SIZE); - f = &skb_shinfo(skb)->frags[0]; - f->page = get_a_page(vi, GFP_ATOMIC); - if (!f->page) { - kfree_skb(skb); - break; - } + err = vi->rvq->vq_ops->add_buf(vi->rvq, vi->rx_sg, 0, 1, page); + if (err < 0) + give_pages(vi, page); - f->page_offset = 0; - f->size = PAGE_SIZE; + return err; +} - skb_shinfo(skb)->nr_frags++; +/* Returns false if we couldn't fill entirely (OOM). */ +static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp) +{ + int err; + bool oom = false; - sg_init_one(sg, page_address(f->page), PAGE_SIZE); - skb_queue_head(&vi->recv, skb); + do { + if (vi->mergeable_rx_bufs) + err = add_recvbuf_mergeable(vi, gfp); + else if (vi->big_packets) + err = add_recvbuf_big(vi, gfp); + else + err = add_recvbuf_small(vi, gfp); - err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, 1, skb); - if (err) { - skb_unlink(skb, &vi->recv); - kfree_skb(skb); + if (err < 0) { + oom = true; break; } - vi->num++; - } + ++vi->num; + } while (err > 0); if (unlikely(vi->num > vi->max)) vi->max = vi->num; vi->rvq->vq_ops->kick(vi->rvq); + return !oom; } static void skb_recv_done(struct virtqueue *rvq) @@ -385,31 +447,46 @@ static void skb_recv_done(struct virtqueue *rvq) } } +static void refill_work(struct work_struct *work) +{ + struct virtnet_info *vi; + bool still_empty; + + vi = container_of(work, struct virtnet_info, refill.work); + napi_disable(&vi->napi); + still_empty = !try_fill_recv(vi, GFP_KERNEL); + napi_enable(&vi->napi); + + /* In theory, this can happen: if we don't get any buffers in + * we will *never* try to fill again. */ + if (still_empty) + schedule_delayed_work(&vi->refill, HZ/2); +} + static int virtnet_poll(struct napi_struct *napi, int budget) { struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi); - struct sk_buff *skb = NULL; + void *buf; unsigned int len, received = 0; again: while (received < budget && - (skb = vi->rvq->vq_ops->get_buf(vi->rvq, &len)) != NULL) { - __skb_unlink(skb, &vi->recv); - receive_skb(vi->dev, skb, len); - vi->num--; + (buf = vi->rvq->vq_ops->get_buf(vi->rvq, &len)) != NULL) { + receive_buf(vi->dev, buf, len); + --vi->num; received++; } - /* FIXME: If we oom and completely run out of inbufs, we need - * to start a timer trying to fill more. */ - if (vi->num < vi->max / 2) - try_fill_recv(vi); + if (vi->num < vi->max / 2) { + if (!try_fill_recv(vi, GFP_ATOMIC)) + schedule_delayed_work(&vi->refill, 0); + } /* Out of packets? */ if (received < budget) { napi_complete(napi); - if (unlikely(!vi->rvq->vq_ops->enable_cb(vi->rvq)) - && napi_schedule_prep(napi)) { + if (unlikely(!vi->rvq->vq_ops->enable_cb(vi->rvq)) && + napi_schedule_prep(napi)) { vi->rvq->vq_ops->disable_cb(vi->rvq); __napi_schedule(napi); goto again; @@ -419,151 +496,112 @@ again: return received; } -static void free_old_xmit_skbs(struct virtnet_info *vi) +static unsigned int free_old_xmit_skbs(struct virtnet_info *vi) { struct sk_buff *skb; - unsigned int len; + unsigned int len, tot_sgs = 0; while ((skb = vi->svq->vq_ops->get_buf(vi->svq, &len)) != NULL) { pr_debug("Sent skb %p\n", skb); - __skb_unlink(skb, &vi->send); vi->dev->stats.tx_bytes += skb->len; vi->dev->stats.tx_packets++; - kfree_skb(skb); + tot_sgs += skb_vnet_hdr(skb)->num_sg; + dev_kfree_skb_any(skb); } -} - -/* If the virtio transport doesn't always notify us when all in-flight packets - * are consumed, we fall back to using this function on a timer to free them. */ -static void xmit_free(unsigned long data) -{ - struct virtnet_info *vi = (void *)data; - - netif_tx_lock(vi->dev); - - free_old_xmit_skbs(vi); - - if (!skb_queue_empty(&vi->send)) - mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10)); - - netif_tx_unlock(vi->dev); + return tot_sgs; } static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) { - int num, err; - struct scatterlist sg[2+MAX_SKB_FRAGS]; - struct virtio_net_hdr_mrg_rxbuf *mhdr = skb_vnet_hdr(skb); - struct virtio_net_hdr *hdr = skb_vnet_hdr(skb); + struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb); const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; - sg_init_table(sg, 2+MAX_SKB_FRAGS); - pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); if (skb->ip_summed == CHECKSUM_PARTIAL) { - hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - hdr->csum_start = skb->csum_start - skb_headroom(skb); - hdr->csum_offset = skb->csum_offset; + hdr->hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + hdr->hdr.csum_start = skb->csum_start - skb_headroom(skb); + hdr->hdr.csum_offset = skb->csum_offset; } else { - hdr->flags = 0; - hdr->csum_offset = hdr->csum_start = 0; + hdr->hdr.flags = 0; + hdr->hdr.csum_offset = hdr->hdr.csum_start = 0; } if (skb_is_gso(skb)) { - hdr->hdr_len = skb_headlen(skb); - hdr->gso_size = skb_shinfo(skb)->gso_size; + hdr->hdr.hdr_len = skb_headlen(skb); + hdr->hdr.gso_size = skb_shinfo(skb)->gso_size; if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) - hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; + hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4; else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) - hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; + hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6; else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) - hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; + hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP; else BUG(); if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN) - hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; + hdr->hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN; } else { - hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE; - hdr->gso_size = hdr->hdr_len = 0; + hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE; + hdr->hdr.gso_size = hdr->hdr.hdr_len = 0; } - mhdr->num_buffers = 0; + hdr->mhdr.num_buffers = 0; /* Encode metadata header at front. */ if (vi->mergeable_rx_bufs) - sg_set_buf(sg, mhdr, sizeof(*mhdr)); + sg_set_buf(vi->tx_sg, &hdr->mhdr, sizeof hdr->mhdr); else - sg_set_buf(sg, hdr, sizeof(*hdr)); + sg_set_buf(vi->tx_sg, &hdr->hdr, sizeof hdr->hdr); - num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; - - err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb); - if (!err && !vi->free_in_tasklet) - mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10)); - - return err; -} - -static void xmit_tasklet(unsigned long data) -{ - struct virtnet_info *vi = (void *)data; - - netif_tx_lock_bh(vi->dev); - if (vi->last_xmit_skb && xmit_skb(vi, vi->last_xmit_skb) == 0) { - vi->svq->vq_ops->kick(vi->svq); - vi->last_xmit_skb = NULL; - } - if (vi->free_in_tasklet) - free_old_xmit_skbs(vi); - netif_tx_unlock_bh(vi->dev); + hdr->num_sg = skb_to_sgvec(skb, vi->tx_sg + 1, 0, skb->len) + 1; + return vi->svq->vq_ops->add_buf(vi->svq, vi->tx_sg, hdr->num_sg, + 0, skb); } -static int start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) { struct virtnet_info *vi = netdev_priv(dev); + int capacity; again: /* Free up any pending old buffers before queueing new ones. */ free_old_xmit_skbs(vi); - /* If we has a buffer left over from last time, send it now. */ - if (unlikely(vi->last_xmit_skb) && - xmit_skb(vi, vi->last_xmit_skb) != 0) - goto stop_queue; + /* Try to transmit */ + capacity = xmit_skb(vi, skb); - vi->last_xmit_skb = NULL; - - /* Put new one in send queue and do transmit */ - if (likely(skb)) { - __skb_queue_head(&vi->send, skb); - if (xmit_skb(vi, skb) != 0) { - vi->last_xmit_skb = skb; - skb = NULL; - goto stop_queue; + /* This can happen with OOM and indirect buffers. */ + if (unlikely(capacity < 0)) { + netif_stop_queue(dev); + dev_warn(&dev->dev, "Unexpected full queue\n"); + if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) { + vi->svq->vq_ops->disable_cb(vi->svq); + netif_start_queue(dev); + goto again; } + return NETDEV_TX_BUSY; } -done: vi->svq->vq_ops->kick(vi->svq); - return NETDEV_TX_OK; - -stop_queue: - pr_debug("%s: virtio not prepared to send\n", dev->name); - netif_stop_queue(dev); - /* Activate callback for using skbs: if this returns false it - * means some were used in the meantime. */ - if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) { - vi->svq->vq_ops->disable_cb(vi->svq); - netif_start_queue(dev); - goto again; - } - if (skb) { - /* Drop this skb: we only queue one. */ - vi->dev->stats.tx_dropped++; - kfree_skb(skb); + /* Don't wait up for transmitted skbs to be freed. */ + skb_orphan(skb); + nf_reset(skb); + + /* Apparently nice girls don't return TX_BUSY; stop the queue + * before it gets out of hand. Naturally, this wastes entries. */ + if (capacity < 2+MAX_SKB_FRAGS) { + netif_stop_queue(dev); + if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) { + /* More just got used, free them then recheck. */ + capacity += free_old_xmit_skbs(vi); + if (capacity >= 2+MAX_SKB_FRAGS) { + netif_start_queue(dev); + vi->svq->vq_ops->disable_cb(vi->svq); + } + } } - goto done; + + return NETDEV_TX_OK; } static int virtnet_set_mac_address(struct net_device *dev, void *p) @@ -640,7 +678,7 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, sg_set_buf(&sg[i + 1], sg_virt(s), s->length); sg_set_buf(&sg[out + in - 1], &status, sizeof(status)); - BUG_ON(vi->cvq->vq_ops->add_buf(vi->cvq, sg, out, in, vi)); + BUG_ON(vi->cvq->vq_ops->add_buf(vi->cvq, sg, out, in, vi) < 0); vi->cvq->vq_ops->kick(vi->cvq); @@ -680,8 +718,9 @@ static void virtnet_set_rx_mode(struct net_device *dev) struct scatterlist sg[2]; u8 promisc, allmulti; struct virtio_net_ctrl_mac *mac_data; - struct dev_addr_list *addr; struct netdev_hw_addr *ha; + int uc_count; + int mc_count; void *buf; int i; @@ -708,9 +747,12 @@ static void virtnet_set_rx_mode(struct net_device *dev) dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", allmulti ? "en" : "dis"); + uc_count = netdev_uc_count(dev); + mc_count = netdev_mc_count(dev); /* MAC filter - use one buffer for both lists */ - mac_data = buf = kzalloc(((dev->uc_count + dev->mc_count) * ETH_ALEN) + - (2 * sizeof(mac_data->entries)), GFP_ATOMIC); + buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) + + (2 * sizeof(mac_data->entries)), GFP_ATOMIC); + mac_data = buf; if (!buf) { dev_warn(&dev->dev, "No memory for MAC address buffer\n"); return; @@ -719,24 +761,24 @@ static void virtnet_set_rx_mode(struct net_device *dev) sg_init_table(sg, 2); /* Store the unicast list and count in the front of the buffer */ - mac_data->entries = dev->uc_count; + mac_data->entries = uc_count; i = 0; - list_for_each_entry(ha, &dev->uc_list, list) + netdev_for_each_uc_addr(ha, dev) memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); sg_set_buf(&sg[0], mac_data, - sizeof(mac_data->entries) + (dev->uc_count * ETH_ALEN)); + sizeof(mac_data->entries) + (uc_count * ETH_ALEN)); /* multicast list and count fill the end */ - mac_data = (void *)&mac_data->macs[dev->uc_count][0]; + mac_data = (void *)&mac_data->macs[uc_count][0]; - mac_data->entries = dev->mc_count; - addr = dev->mc_list; - for (i = 0; i < dev->mc_count; i++, addr = addr->next) - memcpy(&mac_data->macs[i][0], addr->da_addr, ETH_ALEN); + mac_data->entries = mc_count; + i = 0; + netdev_for_each_mc_addr(ha, dev) + memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); sg_set_buf(&sg[1], mac_data, - sizeof(mac_data->entries) + (dev->mc_count * ETH_ALEN)); + sizeof(mac_data->entries) + (mc_count * ETH_ALEN)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET, @@ -770,10 +812,11 @@ static void virtnet_vlan_rx_kill_vid(struct net_device *dev, u16 vid) dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid); } -static struct ethtool_ops virtnet_ethtool_ops = { +static const struct ethtool_ops virtnet_ethtool_ops = { .set_tx_csum = virtnet_set_tx_csum, .set_sg = ethtool_op_set_sg, .set_tso = ethtool_op_set_tso, + .set_ufo = ethtool_op_set_ufo, .get_link = ethtool_op_get_link, }; @@ -893,15 +936,14 @@ static int virtnet_probe(struct virtio_device *vdev) vi->vdev = vdev; vdev->priv = vi; vi->pages = NULL; - - /* If they give us a callback when all buffers are done, we don't need - * the timer. */ - vi->free_in_tasklet = virtio_has_feature(vdev,VIRTIO_F_NOTIFY_ON_EMPTY); + INIT_DELAYED_WORK(&vi->refill, refill_work); + sg_init_table(vi->rx_sg, ARRAY_SIZE(vi->rx_sg)); + sg_init_table(vi->tx_sg, ARRAY_SIZE(vi->tx_sg)); /* If we can receive ANY GSO packets, we must allocate large ones. */ - if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) - || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) - || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN)) + if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || + virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) || + virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN)) vi->big_packets = true; if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) @@ -925,15 +967,6 @@ static int virtnet_probe(struct virtio_device *vdev) dev->features |= NETIF_F_HW_VLAN_FILTER; } - /* Initialize our empty receive and send queues. */ - skb_queue_head_init(&vi->recv); - skb_queue_head_init(&vi->send); - - tasklet_init(&vi->tasklet, xmit_tasklet, (unsigned long)vi); - - if (!vi->free_in_tasklet) - setup_timer(&vi->xmit_free_timer, xmit_free, (unsigned long)vi); - err = register_netdev(dev); if (err) { pr_debug("virtio_net: registering device failed\n"); @@ -941,7 +974,7 @@ static int virtnet_probe(struct virtio_device *vdev) } /* Last of all, set up some receive buffers. */ - try_fill_recv(vi); + try_fill_recv(vi, GFP_KERNEL); /* If we didn't even get one input buffer, we're useless. */ if (vi->num == 0) { @@ -958,6 +991,7 @@ static int virtnet_probe(struct virtio_device *vdev) unregister: unregister_netdev(dev); + cancel_delayed_work_sync(&vi->refill); free_vqs: vdev->config->del_vqs(vdev); free: @@ -965,27 +999,41 @@ free: return err; } -static void virtnet_remove(struct virtio_device *vdev) +static void free_unused_bufs(struct virtnet_info *vi) +{ + void *buf; + while (1) { + buf = vi->svq->vq_ops->detach_unused_buf(vi->svq); + if (!buf) + break; + dev_kfree_skb(buf); + } + while (1) { + buf = vi->rvq->vq_ops->detach_unused_buf(vi->rvq); + if (!buf) + break; + if (vi->mergeable_rx_bufs || vi->big_packets) + give_pages(vi, buf); + else + dev_kfree_skb(buf); + --vi->num; + } + BUG_ON(vi->num != 0); +} + +static void __devexit virtnet_remove(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; - struct sk_buff *skb; /* Stop all the virtqueues. */ vdev->config->reset(vdev); - if (!vi->free_in_tasklet) - del_timer_sync(&vi->xmit_free_timer); - - /* Free our skbs in send and recv queues, if any. */ - while ((skb = __skb_dequeue(&vi->recv)) != NULL) { - kfree_skb(skb); - vi->num--; - } - __skb_queue_purge(&vi->send); - - BUG_ON(vi->num != 0); unregister_netdev(vi->dev); + cancel_delayed_work_sync(&vi->refill); + + /* Free unused buffers in both send and recv, if any. */ + free_unused_bufs(vi); vdev->config->del_vqs(vi->vdev); @@ -1005,13 +1053,12 @@ static unsigned int features[] = { VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC, VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, - VIRTIO_NET_F_GUEST_ECN, /* We don't yet handle UFO input. */ + VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, - VIRTIO_F_NOTIFY_ON_EMPTY, }; -static struct virtio_driver virtio_net = { +static struct virtio_driver virtio_net_driver = { .feature_table = features, .feature_table_size = ARRAY_SIZE(features), .driver.name = KBUILD_MODNAME, @@ -1024,12 +1071,12 @@ static struct virtio_driver virtio_net = { static int __init init(void) { - return register_virtio_driver(&virtio_net); + return register_virtio_driver(&virtio_net_driver); } static void __exit fini(void) { - unregister_virtio_driver(&virtio_net); + unregister_virtio_driver(&virtio_net_driver); } module_init(init); module_exit(fini);