X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=drivers%2Fdma%2Fioat%2Fdma_v2.c;h=5f7a500e18d02636f1b94650202f6b2614323bb7;hb=a87da40875f5510866a699b72818cce09b0b9040;hp=2f34f290041ef81a1f441d4d5c5151cc23edd181;hpb=f6ab95b55735fa03cad8d0f966647e5df206e207;p=safe%2Fjmp%2Flinux-2.6 diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 2f34f29..5f7a500 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -39,12 +39,17 @@ #include "registers.h" #include "hw.h" -static int ioat_ring_alloc_order = 8; +int ioat_ring_alloc_order = 8; module_param(ioat_ring_alloc_order, int, 0644); MODULE_PARM_DESC(ioat_ring_alloc_order, - "ioat2+: allocate 2^n descriptors per channel (default: n=8)"); - -static void __ioat2_issue_pending(struct ioat2_dma_chan *ioat) + "ioat2+: allocate 2^n descriptors per channel" + " (default: 8 max: 16)"); +static int ioat_ring_max_alloc_order = IOAT_MAX_ORDER; +module_param(ioat_ring_max_alloc_order, int, 0644); +MODULE_PARM_DESC(ioat_ring_max_alloc_order, + "ioat2+: upper limit for ring size (default: 16)"); + +void __ioat2_issue_pending(struct ioat2_dma_chan *ioat) { void * __iomem reg_base = ioat->base.reg_base; @@ -59,7 +64,7 @@ static void __ioat2_issue_pending(struct ioat2_dma_chan *ioat) __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount); } -static void ioat2_issue_pending(struct dma_chan *chan) +void ioat2_issue_pending(struct dma_chan *chan) { struct ioat2_dma_chan *ioat = to_ioat2_chan(chan); @@ -92,7 +97,6 @@ static void ioat2_update_pending(struct ioat2_dma_chan *ioat) static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat) { - void __iomem *reg_base = ioat->base.reg_base; struct ioat_ring_ent *desc; struct ioat_dma_descriptor *hw; int idx; @@ -118,10 +122,7 @@ static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat) hw->src_addr = 0; hw->dst_addr = 0; async_tx_ack(&desc->txd); - writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, - reg_base + IOAT2_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->txd.phys) >> 32, - reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); + ioat2_set_chainaddr(ioat, desc->txd.phys); dump_desc_dbg(ioat, desc); __ioat2_issue_pending(ioat); } @@ -133,33 +134,98 @@ static void ioat2_start_null_desc(struct ioat2_dma_chan *ioat) spin_unlock_bh(&ioat->ring_lock); } -static void ioat2_cleanup(struct ioat2_dma_chan *ioat); +static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) +{ + struct ioat_chan_common *chan = &ioat->base; + struct dma_async_tx_descriptor *tx; + struct ioat_ring_ent *desc; + bool seen_current = false; + u16 active; + int i; + + dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", + __func__, ioat->head, ioat->tail, ioat->issued); + + active = ioat2_ring_active(ioat); + for (i = 0; i < active && !seen_current; i++) { + prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1)); + desc = ioat2_get_ring_ent(ioat, ioat->tail + i); + tx = &desc->txd; + dump_desc_dbg(ioat, desc); + if (tx->cookie) { + ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw); + chan->completed_cookie = tx->cookie; + tx->cookie = 0; + if (tx->callback) { + tx->callback(tx->callback_param); + tx->callback = NULL; + } + } + + if (tx->phys == phys_complete) + seen_current = true; + } + ioat->tail += i; + BUG_ON(!seen_current); /* no active descs have written a completion? */ + + chan->last_completion = phys_complete; + if (ioat->head == ioat->tail) { + dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", + __func__); + clear_bit(IOAT_COMPLETION_PENDING, &chan->state); + mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); + } +} /** - * ioat2_reset_part2 - reinit the channel after a reset + * ioat2_cleanup - clean finished descriptors (advance tail pointer) + * @chan: ioat channel to be cleaned up */ -static void ioat2_reset_part2(struct work_struct *work) +static void ioat2_cleanup(struct ioat2_dma_chan *ioat) { - struct ioat_chan_common *chan; - struct ioat2_dma_chan *ioat; + struct ioat_chan_common *chan = &ioat->base; + unsigned long phys_complete; - chan = container_of(work, struct ioat_chan_common, work.work); - ioat = container_of(chan, struct ioat2_dma_chan, base); + prefetch(chan->completion); + + if (!spin_trylock_bh(&chan->cleanup_lock)) + return; + + if (!ioat_cleanup_preamble(chan, &phys_complete)) { + spin_unlock_bh(&chan->cleanup_lock); + return; + } + + if (!spin_trylock_bh(&ioat->ring_lock)) { + spin_unlock_bh(&chan->cleanup_lock); + return; + } + + __cleanup(ioat, phys_complete); + + spin_unlock_bh(&ioat->ring_lock); + spin_unlock_bh(&chan->cleanup_lock); +} + +void ioat2_cleanup_tasklet(unsigned long data) +{ + struct ioat2_dma_chan *ioat = (void *) data; - /* ensure that ->tail points to the stalled descriptor - * (ioat->pending is set to 2 at this point so no new - * descriptors will be issued while we perform this cleanup) - */ ioat2_cleanup(ioat); + writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); +} - spin_lock_bh(&chan->cleanup_lock); - spin_lock_bh(&ioat->ring_lock); +void __ioat2_restart_chan(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; /* set the tail to be re-issued */ ioat->issued = ioat->tail; ioat->dmacount = 0; + set_bit(IOAT_COMPLETION_PENDING, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - dev_dbg(to_dev(&ioat->base), + dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x count: %#x\n", __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount); @@ -167,188 +233,139 @@ static void ioat2_reset_part2(struct work_struct *work) struct ioat_ring_ent *desc; desc = ioat2_get_ring_ent(ioat, ioat->tail); - writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, - chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->txd.phys) >> 32, - chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); + ioat2_set_chainaddr(ioat, desc->txd.phys); __ioat2_issue_pending(ioat); } else __ioat2_start_null_desc(ioat); - - spin_unlock_bh(&ioat->ring_lock); - spin_unlock_bh(&chan->cleanup_lock); - - dev_info(to_dev(chan), - "chan%d reset - %d descs waiting, %d total desc\n", - chan_num(chan), ioat->dmacount, 1 << ioat->alloc_order); } -/** - * ioat2_reset_channel - restart a channel - * @ioat: IOAT DMA channel handle - */ -static void ioat2_reset_channel(struct ioat2_dma_chan *ioat) +int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo) { - u32 chansts, chanerr; - struct ioat_chan_common *chan = &ioat->base; - u16 active; - - spin_lock_bh(&ioat->ring_lock); - active = ioat2_ring_active(ioat); - spin_unlock_bh(&ioat->ring_lock); - if (!active) - return; - - chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - chansts = *chan->completion & IOAT_CHANSTS_DMA_TRANSFER_STATUS; - if (chanerr) { - dev_err(to_dev(chan), - "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", - chan_num(chan), chansts, chanerr); - writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); + unsigned long end = jiffies + tmo; + int err = 0; + u32 status; + + status = ioat_chansts(chan); + if (is_ioat_active(status) || is_ioat_idle(status)) + ioat_suspend(chan); + while (is_ioat_active(status) || is_ioat_idle(status)) { + if (end && time_after(jiffies, end)) { + err = -ETIMEDOUT; + break; + } + status = ioat_chansts(chan); + cpu_relax(); } - spin_lock_bh(&ioat->ring_lock); - ioat->pending = 2; - writeb(IOAT_CHANCMD_RESET, - chan->reg_base - + IOAT_CHANCMD_OFFSET(chan->device->version)); - spin_unlock_bh(&ioat->ring_lock); - schedule_delayed_work(&chan->work, RESET_DELAY); + return err; } -/** - * ioat2_chan_watchdog - watch for stuck channels - */ -static void ioat2_chan_watchdog(struct work_struct *work) +int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo) { - struct ioatdma_device *device = - container_of(work, struct ioatdma_device, work.work); - struct ioat2_dma_chan *ioat; - struct ioat_chan_common *chan; - u16 active; - int i; + unsigned long end = jiffies + tmo; + int err = 0; - dev_dbg(&device->pdev->dev, "%s\n", __func__); - - for (i = 0; i < device->common.chancnt; i++) { - chan = ioat_chan_by_index(device, i); - ioat = container_of(chan, struct ioat2_dma_chan, base); - - /* - * for version 2.0 if there are descriptors yet to be processed - * and the last completed hasn't changed since the last watchdog - * if they haven't hit the pending level - * issue the pending to push them through - * else - * try resetting the channel - */ - spin_lock_bh(&ioat->ring_lock); - active = ioat2_ring_active(ioat); - spin_unlock_bh(&ioat->ring_lock); - - if (active && - chan->last_completion && - chan->last_completion == chan->watchdog_completion) { - - if (ioat->pending == 1) - ioat2_issue_pending(&chan->common); - else { - ioat2_reset_channel(ioat); - chan->watchdog_completion = 0; - } - } else { - chan->last_compl_desc_addr_hw = 0; - chan->watchdog_completion = chan->last_completion; + ioat_reset(chan); + while (ioat_reset_pending(chan)) { + if (end && time_after(jiffies, end)) { + err = -ETIMEDOUT; + break; } - chan->watchdog_last_tcp_cookie = chan->watchdog_tcp_cookie; + cpu_relax(); } - schedule_delayed_work(&device->work, WATCHDOG_DELAY); + + return err; } -/** - * ioat2_cleanup - clean finished descriptors (advance tail pointer) - * @chan: ioat channel to be cleaned up - */ -static void ioat2_cleanup(struct ioat2_dma_chan *ioat) +static void ioat2_restart_channel(struct ioat2_dma_chan *ioat) { struct ioat_chan_common *chan = &ioat->base; unsigned long phys_complete; - struct ioat_ring_ent *desc; - bool seen_current = false; - u16 active; - int i; - struct dma_async_tx_descriptor *tx; - prefetch(chan->completion); + ioat2_quiesce(chan, 0); + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); - spin_lock_bh(&chan->cleanup_lock); - phys_complete = ioat_get_current_completion(chan); - if (phys_complete == chan->last_completion) { - spin_unlock_bh(&chan->cleanup_lock); - /* - * perhaps we're stuck so hard that the watchdog can't go off? - * try to catch it after WATCHDOG_DELAY seconds - */ - if (chan->device->version < IOAT_VER_3_0) { - unsigned long tmo; + __ioat2_restart_chan(ioat); +} - tmo = chan->last_completion_time + HZ*WATCHDOG_DELAY; - if (time_after(jiffies, tmo)) { - ioat2_chan_watchdog(&(chan->device->work.work)); - chan->last_completion_time = jiffies; - } - } - return; - } - chan->last_completion_time = jiffies; +void ioat2_timer_event(unsigned long data) +{ + struct ioat2_dma_chan *ioat = (void *) data; + struct ioat_chan_common *chan = &ioat->base; - spin_lock_bh(&ioat->ring_lock); + spin_lock_bh(&chan->cleanup_lock); + if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { + unsigned long phys_complete; + u64 status; - dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", - __func__, ioat->head, ioat->tail, ioat->issued); + spin_lock_bh(&ioat->ring_lock); + status = ioat_chansts(chan); - active = ioat2_ring_active(ioat); - for (i = 0; i < active && !seen_current; i++) { - prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1)); - desc = ioat2_get_ring_ent(ioat, ioat->tail + i); - tx = &desc->txd; - dump_desc_dbg(ioat, desc); - if (tx->cookie) { - ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw); - chan->completed_cookie = tx->cookie; - tx->cookie = 0; - if (tx->callback) { - tx->callback(tx->callback_param); - tx->callback = NULL; - } + /* when halted due to errors check for channel + * programming errors before advancing the completion state + */ + if (is_ioat_halted(status)) { + u32 chanerr; + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + dev_err(to_dev(chan), "%s: Channel halted (%x)\n", + __func__, chanerr); + BUG_ON(is_ioat_bug(chanerr)); } - if (tx->phys == phys_complete) - seen_current = true; - } - ioat->tail += i; - BUG_ON(!seen_current); /* no active descs have written a completion? */ - spin_unlock_bh(&ioat->ring_lock); + /* if we haven't made progress and we have already + * acknowledged a pending completion once, then be more + * forceful with a restart + */ + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); + else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) + ioat2_restart_channel(ioat); + else { + set_bit(IOAT_COMPLETION_ACK, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + } + spin_unlock_bh(&ioat->ring_lock); + } else { + u16 active; - chan->last_completion = phys_complete; + /* if the ring is idle, empty, and oversized try to step + * down the size + */ + spin_lock_bh(&ioat->ring_lock); + active = ioat2_ring_active(ioat); + if (active == 0 && ioat->alloc_order > ioat_get_alloc_order()) + reshape_ring(ioat, ioat->alloc_order-1); + spin_unlock_bh(&ioat->ring_lock); + /* keep shrinking until we get back to our minimum + * default size + */ + if (ioat->alloc_order > ioat_get_alloc_order()) + mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); + } spin_unlock_bh(&chan->cleanup_lock); } -static void ioat2_cleanup_tasklet(unsigned long data) +static int ioat2_reset_hw(struct ioat_chan_common *chan) { - struct ioat2_dma_chan *ioat = (void *) data; + /* throw away whatever the channel was doing and get it initialized */ + u32 chanerr; - ioat2_cleanup(ioat); - writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); + ioat2_quiesce(chan, msecs_to_jiffies(100)); + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); + + return ioat2_reset_sync(chan, msecs_to_jiffies(200)); } /** * ioat2_enumerate_channels - find and initialize the device's channels * @device: the device to be enumerated */ -static int ioat2_enumerate_channels(struct ioatdma_device *device) +int ioat2_enumerate_channels(struct ioatdma_device *device) { struct ioat2_dma_chan *ioat; struct device *dev = &device->pdev->dev; @@ -381,11 +398,15 @@ static int ioat2_enumerate_channels(struct ioatdma_device *device) break; ioat_init_channel(device, &ioat->base, i, - ioat2_reset_part2, - ioat2_cleanup_tasklet, + device->timer_fn, + device->cleanup_tasklet, (unsigned long) ioat); ioat->xfercap_log = xfercap_log; spin_lock_init(&ioat->ring_lock); + if (device->reset_hw(&ioat->base)) { + i = 0; + break; + } } dma->chancnt = i; return i; @@ -395,6 +416,7 @@ static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx) { struct dma_chan *c = tx->chan; struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_chan_common *chan = &ioat->base; dma_cookie_t cookie = c->cookie; cookie++; @@ -404,13 +426,15 @@ static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx) c->cookie = cookie; dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie); + if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state)) + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); ioat2_update_pending(ioat); spin_unlock_bh(&ioat->ring_lock); return cookie; } -static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan) +static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan, gfp_t flags) { struct ioat_dma_descriptor *hw; struct ioat_ring_ent *desc; @@ -418,16 +442,17 @@ static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan) dma_addr_t phys; dma = to_ioatdma_device(chan->device); - hw = pci_pool_alloc(dma->dma_pool, GFP_KERNEL, &phys); + hw = pci_pool_alloc(dma->dma_pool, flags, &phys); if (!hw) return NULL; memset(hw, 0, sizeof(*hw)); - desc = kzalloc(sizeof(*desc), GFP_KERNEL); + desc = kmem_cache_alloc(ioat2_cache, flags); if (!desc) { pci_pool_free(dma->dma_pool, hw, phys); return NULL; } + memset(desc, 0, sizeof(*desc)); dma_async_tx_descriptor_init(&desc->txd, chan); desc->txd.tx_submit = ioat2_tx_submit_unlock; @@ -442,20 +467,54 @@ static void ioat2_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *cha dma = to_ioatdma_device(chan->device); pci_pool_free(dma->dma_pool, desc->hw, desc->txd.phys); - kfree(desc); + kmem_cache_free(ioat2_cache, desc); +} + +static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags) +{ + struct ioat_ring_ent **ring; + int descs = 1 << order; + int i; + + if (order > ioat_get_max_alloc_order()) + return NULL; + + /* allocate the array to hold the software ring */ + ring = kcalloc(descs, sizeof(*ring), flags); + if (!ring) + return NULL; + for (i = 0; i < descs; i++) { + ring[i] = ioat2_alloc_ring_ent(c, flags); + if (!ring[i]) { + while (i--) + ioat2_free_ring_ent(ring[i], c); + kfree(ring); + return NULL; + } + set_desc_id(ring[i], i); + } + + /* link descs */ + for (i = 0; i < descs-1; i++) { + struct ioat_ring_ent *next = ring[i+1]; + struct ioat_dma_descriptor *hw = ring[i]->hw; + + hw->next = next->txd.phys; + } + ring[i]->hw->next = ring[0]->txd.phys; + + return ring; } /* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring * @chan: channel to be initialized */ -static int ioat2_alloc_chan_resources(struct dma_chan *c) +int ioat2_alloc_chan_resources(struct dma_chan *c) { struct ioat2_dma_chan *ioat = to_ioat2_chan(c); struct ioat_chan_common *chan = &ioat->base; struct ioat_ring_ent **ring; - u32 chanerr; - int descs; - int i; + int order; /* have we already been set up? */ if (ioat->ring) @@ -464,12 +523,6 @@ static int ioat2_alloc_chan_resources(struct dma_chan *c) /* Setup register to interrupt and write completion status on error */ writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET); - chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - if (chanerr) { - dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr); - writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); - } - /* allocate a completion writeback area */ /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ chan->completion = pci_pool_alloc(chan->device->completion_pool, @@ -483,32 +536,10 @@ static int ioat2_alloc_chan_resources(struct dma_chan *c) writel(((u64) chan->completion_dma) >> 32, chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); - ioat->alloc_order = ioat_get_alloc_order(); - descs = 1 << ioat->alloc_order; - - /* allocate the array to hold the software ring */ - ring = kcalloc(descs, sizeof(*ring), GFP_KERNEL); + order = ioat_get_alloc_order(); + ring = ioat2_alloc_ring(c, order, GFP_KERNEL); if (!ring) return -ENOMEM; - for (i = 0; i < descs; i++) { - ring[i] = ioat2_alloc_ring_ent(c); - if (!ring[i]) { - while (i--) - ioat2_free_ring_ent(ring[i], c); - kfree(ring); - return -ENOMEM; - } - set_desc_id(ring[i], i); - } - - /* link descs */ - for (i = 0; i < descs-1; i++) { - struct ioat_ring_ent *next = ring[i+1]; - struct ioat_dma_descriptor *hw = ring[i]->hw; - - hw->next = next->txd.phys; - } - ring[i]->hw->next = ring[0]->txd.phys; spin_lock_bh(&ioat->ring_lock); ioat->ring = ring; @@ -516,12 +547,120 @@ static int ioat2_alloc_chan_resources(struct dma_chan *c) ioat->issued = 0; ioat->tail = 0; ioat->pending = 0; + ioat->alloc_order = order; spin_unlock_bh(&ioat->ring_lock); tasklet_enable(&chan->cleanup_task); ioat2_start_null_desc(ioat); - return descs; + return 1 << ioat->alloc_order; +} + +bool reshape_ring(struct ioat2_dma_chan *ioat, int order) +{ + /* reshape differs from normal ring allocation in that we want + * to allocate a new software ring while only + * extending/truncating the hardware ring + */ + struct ioat_chan_common *chan = &ioat->base; + struct dma_chan *c = &chan->common; + const u16 curr_size = ioat2_ring_mask(ioat) + 1; + const u16 active = ioat2_ring_active(ioat); + const u16 new_size = 1 << order; + struct ioat_ring_ent **ring; + u16 i; + + if (order > ioat_get_max_alloc_order()) + return false; + + /* double check that we have at least 1 free descriptor */ + if (active == curr_size) + return false; + + /* when shrinking, verify that we can hold the current active + * set in the new ring + */ + if (active >= new_size) + return false; + + /* allocate the array to hold the software ring */ + ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT); + if (!ring) + return false; + + /* allocate/trim descriptors as needed */ + if (new_size > curr_size) { + /* copy current descriptors to the new ring */ + for (i = 0; i < curr_size; i++) { + u16 curr_idx = (ioat->tail+i) & (curr_size-1); + u16 new_idx = (ioat->tail+i) & (new_size-1); + + ring[new_idx] = ioat->ring[curr_idx]; + set_desc_id(ring[new_idx], new_idx); + } + + /* add new descriptors to the ring */ + for (i = curr_size; i < new_size; i++) { + u16 new_idx = (ioat->tail+i) & (new_size-1); + + ring[new_idx] = ioat2_alloc_ring_ent(c, GFP_NOWAIT); + if (!ring[new_idx]) { + while (i--) { + u16 new_idx = (ioat->tail+i) & (new_size-1); + + ioat2_free_ring_ent(ring[new_idx], c); + } + kfree(ring); + return false; + } + set_desc_id(ring[new_idx], new_idx); + } + + /* hw link new descriptors */ + for (i = curr_size-1; i < new_size; i++) { + u16 new_idx = (ioat->tail+i) & (new_size-1); + struct ioat_ring_ent *next = ring[(new_idx+1) & (new_size-1)]; + struct ioat_dma_descriptor *hw = ring[new_idx]->hw; + + hw->next = next->txd.phys; + } + } else { + struct ioat_dma_descriptor *hw; + struct ioat_ring_ent *next; + + /* copy current descriptors to the new ring, dropping the + * removed descriptors + */ + for (i = 0; i < new_size; i++) { + u16 curr_idx = (ioat->tail+i) & (curr_size-1); + u16 new_idx = (ioat->tail+i) & (new_size-1); + + ring[new_idx] = ioat->ring[curr_idx]; + set_desc_id(ring[new_idx], new_idx); + } + + /* free deleted descriptors */ + for (i = new_size; i < curr_size; i++) { + struct ioat_ring_ent *ent; + + ent = ioat2_get_ring_ent(ioat, ioat->tail+i); + ioat2_free_ring_ent(ent, c); + } + + /* fix up hardware ring */ + hw = ring[(ioat->tail+new_size-1) & (new_size-1)]->hw; + next = ring[(ioat->tail+new_size) & (new_size-1)]; + hw->next = next->txd.phys; + } + + dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n", + __func__, new_size); + + kfree(ioat->ring); + ioat->ring = ring; + ioat->alloc_order = order; + + return true; } /** @@ -530,12 +669,20 @@ static int ioat2_alloc_chan_resources(struct dma_chan *c) * @ioat: ioat2,3 channel (ring) to operate on * @num_descs: allocation length */ -static int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs) +int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs) { struct ioat_chan_common *chan = &ioat->base; spin_lock_bh(&ioat->ring_lock); - if (unlikely(ioat2_ring_space(ioat) < num_descs)) { + /* never allow the last descriptor to be consumed, we need at + * least one free at all times to allow for on-the-fly ring + * resizing. + */ + while (unlikely(ioat2_ring_space(ioat) <= num_descs)) { + if (reshape_ring(ioat, ioat->alloc_order + 1) && + ioat2_ring_space(ioat) > num_descs) + break; + if (printk_ratelimit()) dev_dbg(to_dev(chan), "%s: ring full! num_descs: %d (%x:%x:%x)\n", @@ -543,9 +690,20 @@ static int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_d ioat->issued); spin_unlock_bh(&ioat->ring_lock); - /* do direct reclaim in the allocation failure case */ - ioat2_cleanup(ioat); - + /* progress reclaim in the allocation failure case we + * may be called under bh_disabled so we need to trigger + * the timer event directly + */ + spin_lock_bh(&chan->cleanup_lock); + if (jiffies > chan->timer.expires && + timer_pending(&chan->timer)) { + struct ioatdma_device *device = chan->device; + + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + spin_unlock_bh(&chan->cleanup_lock); + device->timer_fn((unsigned long) ioat); + } else + spin_unlock_bh(&chan->cleanup_lock); return -ENOMEM; } @@ -556,7 +714,7 @@ static int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_d return 0; /* with ioat->ring_lock held */ } -static struct dma_async_tx_descriptor * +struct dma_async_tx_descriptor * ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, dma_addr_t dma_src, size_t len, unsigned long flags) { @@ -576,7 +734,8 @@ ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, /* pass */; else return NULL; - for (i = 0; i < num_descs; i++) { + i = 0; + do { size_t copy = min_t(size_t, len, 1 << ioat->xfercap_log); desc = ioat2_get_ring_ent(ioat, idx + i); @@ -591,11 +750,12 @@ ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, dst += copy; src += copy; dump_desc_dbg(ioat, desc); - } + } while (++i < num_descs); desc->txd.flags = flags; desc->len = total_len; hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); hw->ctl_f.compl_write = 1; dump_desc_dbg(ioat, desc); /* we leave the channel locked to ensure in order submission */ @@ -607,11 +767,11 @@ ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, * ioat2_free_chan_resources - release all the descriptors * @chan: the channel to be cleaned */ -static void ioat2_free_chan_resources(struct dma_chan *c) +void ioat2_free_chan_resources(struct dma_chan *c) { struct ioat2_dma_chan *ioat = to_ioat2_chan(c); struct ioat_chan_common *chan = &ioat->base; - struct ioatdma_device *ioatdma_device = chan->device; + struct ioatdma_device *device = chan->device; struct ioat_ring_ent *desc; const u16 total_descs = 1 << ioat->alloc_order; int descs; @@ -624,14 +784,9 @@ static void ioat2_free_chan_resources(struct dma_chan *c) return; tasklet_disable(&chan->cleanup_task); - ioat2_cleanup(ioat); - - /* Delay 100ms after reset to allow internal DMA logic to quiesce - * before removing DMA descriptor resources. - */ - writeb(IOAT_CHANCMD_RESET, - chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); - mdelay(100); + del_timer_sync(&chan->timer); + device->cleanup_tasklet((unsigned long) ioat); + device->reset_hw(chan); spin_lock_bh(&ioat->ring_lock); descs = ioat2_ring_space(ioat); @@ -654,8 +809,7 @@ static void ioat2_free_chan_resources(struct dma_chan *c) kfree(ioat->ring); ioat->ring = NULL; ioat->alloc_order = 0; - pci_pool_free(ioatdma_device->completion_pool, - chan->completion, + pci_pool_free(device->completion_pool, chan->completion, chan->completion_dma); spin_unlock_bh(&ioat->ring_lock); @@ -663,75 +817,66 @@ static void ioat2_free_chan_resources(struct dma_chan *c) chan->completion_dma = 0; ioat->pending = 0; ioat->dmacount = 0; - chan->watchdog_completion = 0; - chan->last_compl_desc_addr_hw = 0; - chan->watchdog_tcp_cookie = 0; - chan->watchdog_last_tcp_cookie = 0; } -static enum dma_status +enum dma_status ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie, dma_cookie_t *done, dma_cookie_t *used) { struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioatdma_device *device = ioat->base.device; if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) return DMA_SUCCESS; - ioat2_cleanup(ioat); + device->cleanup_tasklet((unsigned long) ioat); return ioat_is_complete(c, cookie, done, used); } -int ioat2_dma_probe(struct ioatdma_device *device, int dca) +static ssize_t ring_size_show(struct dma_chan *c, char *page) { - struct pci_dev *pdev = device->pdev; - struct dma_device *dma; - struct dma_chan *c; - struct ioat_chan_common *chan; - int err; - - device->enumerate_channels = ioat2_enumerate_channels; - dma = &device->common; - dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; - dma->device_issue_pending = ioat2_issue_pending; - dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; - dma->device_free_chan_resources = ioat2_free_chan_resources; - dma->device_is_tx_complete = ioat2_is_complete; - - err = ioat_probe(device); - if (err) - return err; - ioat_set_tcp_copy_break(2048); - - list_for_each_entry(c, &dma->channels, device_node) { - chan = to_chan_common(c); - writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU, - chan->reg_base + IOAT_DCACTRL_OFFSET); - } + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - err = ioat_register(device); - if (err) - return err; - if (dca) - device->dca = ioat2_dca_init(pdev, device->reg_base); + return sprintf(page, "%d\n", (1 << ioat->alloc_order) & ~1); +} +static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size); - INIT_DELAYED_WORK(&device->work, ioat2_chan_watchdog); - schedule_delayed_work(&device->work, WATCHDOG_DELAY); +static ssize_t ring_active_show(struct dma_chan *c, char *page) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - return err; + /* ...taken outside the lock, no need to be precise */ + return sprintf(page, "%d\n", ioat2_ring_active(ioat)); } - -int ioat3_dma_probe(struct ioatdma_device *device, int dca) +static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active); + +static struct attribute *ioat2_attrs[] = { + &ring_size_attr.attr, + &ring_active_attr.attr, + &ioat_cap_attr.attr, + &ioat_version_attr.attr, + NULL, +}; + +struct kobj_type ioat2_ktype = { + .sysfs_ops = &ioat_sysfs_ops, + .default_attrs = ioat2_attrs, +}; + +int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca) { struct pci_dev *pdev = device->pdev; struct dma_device *dma; struct dma_chan *c; struct ioat_chan_common *chan; int err; - u16 dev_id; device->enumerate_channels = ioat2_enumerate_channels; + device->reset_hw = ioat2_reset_hw; + device->cleanup_tasklet = ioat2_cleanup_tasklet; + device->timer_fn = ioat2_timer_event; + device->self_test = ioat_dma_self_test; dma = &device->common; dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; dma->device_issue_pending = ioat2_issue_pending; @@ -739,35 +884,25 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) dma->device_free_chan_resources = ioat2_free_chan_resources; dma->device_is_tx_complete = ioat2_is_complete; - /* -= IOAT ver.3 workarounds =- */ - /* Write CHANERRMSK_INT with 3E07h to mask out the errors - * that can cause stability issues for IOAT ver.3 - */ - pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07); - - /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit - * (workaround for spurious config parity error after restart) - */ - pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); - if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) - pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10); - err = ioat_probe(device); if (err) return err; - ioat_set_tcp_copy_break(262144); + ioat_set_tcp_copy_break(2048); list_for_each_entry(c, &dma->channels, device_node) { chan = to_chan_common(c); - writel(IOAT_DMA_DCA_ANY_CPU, + writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU, chan->reg_base + IOAT_DCACTRL_OFFSET); } err = ioat_register(device); if (err) return err; + + ioat_kobject_add(device, &ioat2_ktype); + if (dca) - device->dca = ioat3_dca_init(pdev, device->reg_base); + device->dca = ioat2_dca_init(pdev, device->reg_base); return err; }