Revert "b43: Enforce DMA descriptor memory constraints"
[safe/jmp/linux-2.6] / drivers / net / wireless / b43 / dma.c
index f3552ac..88d1fd0 100644 (file)
 #include <linux/pci.h>
 #include <linux/delay.h>
 #include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+#include <asm/div64.h>
+
+
+/* Required number of TX DMA slots per TX frame.
+ * This currently is 2, because we put the header and the ieee80211 frame
+ * into separate slots. */
+#define TX_SLOTS_PER_FRAME     2
+
 
 /* 32bit DMA ops. */
 static
@@ -71,8 +80,7 @@ static void op32_fill_descriptor(struct b43_dmaring *ring,
        addrext = (u32) (dmaaddr & SSB_DMA_TRANSLATION_MASK)
            >> SSB_DMA_TRANSLATION_SHIFT;
        addr |= ssb_dma_translation(ring->dev->dev);
-       ctl = (bufsize - ring->frameoffset)
-           & B43_DMA32_DCTL_BYTECNT;
+       ctl = bufsize & B43_DMA32_DCTL_BYTECNT;
        if (slot == ring->nr_slots - 1)
                ctl |= B43_DMA32_DCTL_DTABLEEND;
        if (start)
@@ -174,8 +182,7 @@ static void op64_fill_descriptor(struct b43_dmaring *ring,
                ctl0 |= B43_DMA64_DCTL0_FRAMEEND;
        if (irq)
                ctl0 |= B43_DMA64_DCTL0_IRQ;
-       ctl1 |= (bufsize - ring->frameoffset)
-           & B43_DMA64_DCTL1_BYTECNT;
+       ctl1 |= bufsize & B43_DMA64_DCTL1_BYTECNT;
        ctl1 |= (addrext << B43_DMA64_DCTL1_ADDREXT_SHIFT)
            & B43_DMA64_DCTL1_ADDREXT_MASK;
 
@@ -289,55 +296,7 @@ static inline int request_slot(struct b43_dmaring *ring)
        return slot;
 }
 
-/* Mac80211-queue to b43-ring mapping */
-static struct b43_dmaring *priority_to_txring(struct b43_wldev *dev,
-                                             int queue_priority)
-{
-       struct b43_dmaring *ring;
-
-/*FIXME: For now we always run on TX-ring-1 */
-       return dev->dma.tx_ring1;
-
-       /* 0 = highest priority */
-       switch (queue_priority) {
-       default:
-               B43_WARN_ON(1);
-               /* fallthrough */
-       case 0:
-               ring = dev->dma.tx_ring3;
-               break;
-       case 1:
-               ring = dev->dma.tx_ring2;
-               break;
-       case 2:
-               ring = dev->dma.tx_ring1;
-               break;
-       case 3:
-               ring = dev->dma.tx_ring0;
-               break;
-       case 4:
-               ring = dev->dma.tx_ring4;
-               break;
-       case 5:
-               ring = dev->dma.tx_ring5;
-               break;
-       }
-
-       return ring;
-}
-
-/* Bcm43xx-ring to mac80211-queue mapping */
-static inline int txring_to_priority(struct b43_dmaring *ring)
-{
-       static const u8 idx_to_prio[] = { 3, 2, 1, 0, 4, 5, };
-
-/*FIXME: have only one queue, for now */
-       return 0;
-
-       return idx_to_prio[ring->index];
-}
-
-u16 b43_dmacontroller_base(int dma64bit, int controller_idx)
+static u16 b43_dmacontroller_base(enum b43_dmatype type, int controller_idx)
 {
        static const u16 map64[] = {
                B43_MMIO_DMA64_BASE0,
@@ -356,7 +315,7 @@ u16 b43_dmacontroller_base(int dma64bit, int controller_idx)
                B43_MMIO_DMA32_BASE5,
        };
 
-       if (dma64bit) {
+       if (type == B43_DMA_64BIT) {
                B43_WARN_ON(!(controller_idx >= 0 &&
                              controller_idx < ARRAY_SIZE(map64)));
                return map64[controller_idx];
@@ -373,11 +332,11 @@ static inline
        dma_addr_t dmaaddr;
 
        if (tx) {
-               dmaaddr = dma_map_single(ring->dev->dev->dev,
-                                        buf, len, DMA_TO_DEVICE);
+               dmaaddr = ssb_dma_map_single(ring->dev->dev,
+                                            buf, len, DMA_TO_DEVICE);
        } else {
-               dmaaddr = dma_map_single(ring->dev->dev->dev,
-                                        buf, len, DMA_FROM_DEVICE);
+               dmaaddr = ssb_dma_map_single(ring->dev->dev,
+                                            buf, len, DMA_FROM_DEVICE);
        }
 
        return dmaaddr;
@@ -388,10 +347,11 @@ static inline
                          dma_addr_t addr, size_t len, int tx)
 {
        if (tx) {
-               dma_unmap_single(ring->dev->dev->dev, addr, len, DMA_TO_DEVICE);
+               ssb_dma_unmap_single(ring->dev->dev,
+                                    addr, len, DMA_TO_DEVICE);
        } else {
-               dma_unmap_single(ring->dev->dev->dev,
-                                addr, len, DMA_FROM_DEVICE);
+               ssb_dma_unmap_single(ring->dev->dev,
+                                    addr, len, DMA_FROM_DEVICE);
        }
 }
 
@@ -400,8 +360,8 @@ static inline
                                 dma_addr_t addr, size_t len)
 {
        B43_WARN_ON(ring->tx);
-       dma_sync_single_for_cpu(ring->dev->dev->dev,
-                               addr, len, DMA_FROM_DEVICE);
+       ssb_dma_sync_single_for_cpu(ring->dev->dev,
+                                   addr, len, DMA_FROM_DEVICE);
 }
 
 static inline
@@ -409,8 +369,8 @@ static inline
                                    dma_addr_t addr, size_t len)
 {
        B43_WARN_ON(ring->tx);
-       dma_sync_single_for_device(ring->dev->dev->dev,
-                                  addr, len, DMA_FROM_DEVICE);
+       ssb_dma_sync_single_for_device(ring->dev->dev,
+                                      addr, len, DMA_FROM_DEVICE);
 }
 
 static inline
@@ -425,7 +385,6 @@ static inline
 
 static int alloc_ringmemory(struct b43_dmaring *ring)
 {
-       struct device *dev = ring->dev->dev->dev;
        gfp_t flags = GFP_KERNEL;
 
        /* The specs call for 4K buffers for 30- and 32-bit DMA with 4K
@@ -436,11 +395,14 @@ static int alloc_ringmemory(struct b43_dmaring *ring)
         * For unknown reasons - possibly a hardware error - the BCM4311 rev
         * 02, which uses 64-bit DMA, needs the ring buffer in very low memory,
         * which accounts for the GFP_DMA flag below.
+        *
+        * The flags here must match the flags in free_ringmemory below!
         */
-       if (ring->dma64)
+       if (ring->type == B43_DMA_64BIT)
                flags |= GFP_DMA;
-       ring->descbase = dma_alloc_coherent(dev, B43_DMA_RINGMEMSIZE,
-                                           &(ring->dmabase), flags);
+       ring->descbase = ssb_dma_alloc_consistent(ring->dev->dev,
+                                                 B43_DMA_RINGMEMSIZE,
+                                                 &(ring->dmabase), flags);
        if (!ring->descbase) {
                b43err(ring->dev->wl, "DMA ringmemory allocation failed\n");
                return -ENOMEM;
@@ -452,14 +414,18 @@ static int alloc_ringmemory(struct b43_dmaring *ring)
 
 static void free_ringmemory(struct b43_dmaring *ring)
 {
-       struct device *dev = ring->dev->dev->dev;
+       gfp_t flags = GFP_KERNEL;
+
+       if (ring->type == B43_DMA_64BIT)
+               flags |= GFP_DMA;
 
-       dma_free_coherent(dev, B43_DMA_RINGMEMSIZE,
-                         ring->descbase, ring->dmabase);
+       ssb_dma_free_consistent(ring->dev->dev, B43_DMA_RINGMEMSIZE,
+                               ring->descbase, ring->dmabase, flags);
 }
 
 /* Reset the RX DMA channel */
-int b43_dmacontroller_rx_reset(struct b43_wldev *dev, u16 mmio_base, int dma64)
+static int b43_dmacontroller_rx_reset(struct b43_wldev *dev, u16 mmio_base,
+                                     enum b43_dmatype type)
 {
        int i;
        u32 value;
@@ -467,12 +433,13 @@ int b43_dmacontroller_rx_reset(struct b43_wldev *dev, u16 mmio_base, int dma64)
 
        might_sleep();
 
-       offset = dma64 ? B43_DMA64_RXCTL : B43_DMA32_RXCTL;
+       offset = (type == B43_DMA_64BIT) ? B43_DMA64_RXCTL : B43_DMA32_RXCTL;
        b43_write32(dev, mmio_base + offset, 0);
        for (i = 0; i < 10; i++) {
-               offset = dma64 ? B43_DMA64_RXSTATUS : B43_DMA32_RXSTATUS;
+               offset = (type == B43_DMA_64BIT) ? B43_DMA64_RXSTATUS :
+                                                  B43_DMA32_RXSTATUS;
                value = b43_read32(dev, mmio_base + offset);
-               if (dma64) {
+               if (type == B43_DMA_64BIT) {
                        value &= B43_DMA64_RXSTAT;
                        if (value == B43_DMA64_RXSTAT_DISABLED) {
                                i = -1;
@@ -496,7 +463,8 @@ int b43_dmacontroller_rx_reset(struct b43_wldev *dev, u16 mmio_base, int dma64)
 }
 
 /* Reset the TX DMA channel */
-int b43_dmacontroller_tx_reset(struct b43_wldev *dev, u16 mmio_base, int dma64)
+static int b43_dmacontroller_tx_reset(struct b43_wldev *dev, u16 mmio_base,
+                                     enum b43_dmatype type)
 {
        int i;
        u32 value;
@@ -505,9 +473,10 @@ int b43_dmacontroller_tx_reset(struct b43_wldev *dev, u16 mmio_base, int dma64)
        might_sleep();
 
        for (i = 0; i < 10; i++) {
-               offset = dma64 ? B43_DMA64_TXSTATUS : B43_DMA32_TXSTATUS;
+               offset = (type == B43_DMA_64BIT) ? B43_DMA64_TXSTATUS :
+                                                  B43_DMA32_TXSTATUS;
                value = b43_read32(dev, mmio_base + offset);
-               if (dma64) {
+               if (type == B43_DMA_64BIT) {
                        value &= B43_DMA64_TXSTAT;
                        if (value == B43_DMA64_TXSTAT_DISABLED ||
                            value == B43_DMA64_TXSTAT_IDLEWAIT ||
@@ -522,12 +491,13 @@ int b43_dmacontroller_tx_reset(struct b43_wldev *dev, u16 mmio_base, int dma64)
                }
                msleep(1);
        }
-       offset = dma64 ? B43_DMA64_TXCTL : B43_DMA32_TXCTL;
+       offset = (type == B43_DMA_64BIT) ? B43_DMA64_TXCTL : B43_DMA32_TXCTL;
        b43_write32(dev, mmio_base + offset, 0);
        for (i = 0; i < 10; i++) {
-               offset = dma64 ? B43_DMA64_TXSTATUS : B43_DMA32_TXSTATUS;
+               offset = (type == B43_DMA_64BIT) ? B43_DMA64_TXSTATUS :
+                                                  B43_DMA32_TXSTATUS;
                value = b43_read32(dev, mmio_base + offset);
-               if (dma64) {
+               if (type == B43_DMA_64BIT) {
                        value &= B43_DMA64_TXSTAT;
                        if (value == B43_DMA64_TXSTAT_DISABLED) {
                                i = -1;
@@ -552,12 +522,65 @@ int b43_dmacontroller_tx_reset(struct b43_wldev *dev, u16 mmio_base, int dma64)
        return 0;
 }
 
+/* Check if a DMA mapping address is invalid. */
+static bool b43_dma_mapping_error(struct b43_dmaring *ring,
+                                 dma_addr_t addr,
+                                 size_t buffersize, bool dma_to_device)
+{
+       if (unlikely(ssb_dma_mapping_error(ring->dev->dev, addr)))
+               return 1;
+
+       switch (ring->type) {
+       case B43_DMA_30BIT:
+               if ((u64)addr + buffersize > (1ULL << 30))
+                       goto address_error;
+               break;
+       case B43_DMA_32BIT:
+               if ((u64)addr + buffersize > (1ULL << 32))
+                       goto address_error;
+               break;
+       case B43_DMA_64BIT:
+               /* Currently we can't have addresses beyond
+                * 64bit in the kernel. */
+               break;
+       }
+
+       /* The address is OK. */
+       return 0;
+
+address_error:
+       /* We can't support this address. Unmap it again. */
+       unmap_descbuffer(ring, addr, buffersize, dma_to_device);
+
+       return 1;
+}
+
+static bool b43_rx_buffer_is_poisoned(struct b43_dmaring *ring, struct sk_buff *skb)
+{
+       unsigned char *f = skb->data + ring->frameoffset;
+
+       return ((f[0] & f[1] & f[2] & f[3] & f[4] & f[5] & f[6] & f[7]) == 0xFF);
+}
+
+static void b43_poison_rx_buffer(struct b43_dmaring *ring, struct sk_buff *skb)
+{
+       struct b43_rxhdr_fw4 *rxhdr;
+       unsigned char *frame;
+
+       /* This poisons the RX buffer to detect DMA failures. */
+
+       rxhdr = (struct b43_rxhdr_fw4 *)(skb->data);
+       rxhdr->frame_len = 0;
+
+       B43_WARN_ON(ring->rx_buffersize < ring->frameoffset + sizeof(struct b43_plcp_hdr6) + 2);
+       frame = skb->data + ring->frameoffset;
+       memset(frame, 0xFF, sizeof(struct b43_plcp_hdr6) + 2 /* padding */);
+}
+
 static int setup_rx_descbuffer(struct b43_dmaring *ring,
                               struct b43_dmadesc_generic *desc,
                               struct b43_dmadesc_meta *meta, gfp_t gfp_flags)
 {
-       struct b43_rxhdr_fw4 *rxhdr;
-       struct b43_hwtxstatus *txstat;
        dma_addr_t dmaaddr;
        struct sk_buff *skb;
 
@@ -566,8 +589,9 @@ static int setup_rx_descbuffer(struct b43_dmaring *ring,
        skb = __dev_alloc_skb(ring->rx_buffersize, gfp_flags);
        if (unlikely(!skb))
                return -ENOMEM;
+       b43_poison_rx_buffer(ring, skb);
        dmaaddr = map_descbuffer(ring, skb->data, ring->rx_buffersize, 0);
-       if (dma_mapping_error(dmaaddr)) {
+       if (b43_dma_mapping_error(ring, dmaaddr, ring->rx_buffersize, 0)) {
                /* ugh. try to realloc in zone_dma */
                gfp_flags |= GFP_DMA;
 
@@ -576,13 +600,14 @@ static int setup_rx_descbuffer(struct b43_dmaring *ring,
                skb = __dev_alloc_skb(ring->rx_buffersize, gfp_flags);
                if (unlikely(!skb))
                        return -ENOMEM;
+               b43_poison_rx_buffer(ring, skb);
                dmaaddr = map_descbuffer(ring, skb->data,
                                         ring->rx_buffersize, 0);
-       }
-
-       if (dma_mapping_error(dmaaddr)) {
-               dev_kfree_skb_any(skb);
-               return -EIO;
+               if (b43_dma_mapping_error(ring, dmaaddr, ring->rx_buffersize, 0)) {
+                       b43err(ring->dev->wl, "RX DMA buffer allocation failed\n");
+                       dev_kfree_skb_any(skb);
+                       return -EIO;
+               }
        }
 
        meta->skb = skb;
@@ -590,11 +615,6 @@ static int setup_rx_descbuffer(struct b43_dmaring *ring,
        ring->ops->fill_descriptor(ring, desc, dmaaddr,
                                   ring->rx_buffersize, 0, 0, 0);
 
-       rxhdr = (struct b43_rxhdr_fw4 *)(skb->data);
-       rxhdr->frame_len = 0;
-       txstat = (struct b43_hwtxstatus *)(skb->data);
-       txstat->cookie = 0;
-
        return 0;
 }
 
@@ -645,7 +665,7 @@ static int dmacontroller_setup(struct b43_dmaring *ring)
        u32 trans = ssb_dma_translation(ring->dev->dev);
 
        if (ring->tx) {
-               if (ring->dma64) {
+               if (ring->type == B43_DMA_64BIT) {
                        u64 ringbase = (u64) (ring->dmabase);
 
                        addrext = ((ringbase >> 32) & SSB_DMA_TRANSLATION_MASK)
@@ -677,7 +697,7 @@ static int dmacontroller_setup(struct b43_dmaring *ring)
                err = alloc_initial_descbuffers(ring);
                if (err)
                        goto out;
-               if (ring->dma64) {
+               if (ring->type == B43_DMA_64BIT) {
                        u64 ringbase = (u64) (ring->dmabase);
 
                        addrext = ((ringbase >> 32) & SSB_DMA_TRANSLATION_MASK)
@@ -722,16 +742,16 @@ static void dmacontroller_cleanup(struct b43_dmaring *ring)
 {
        if (ring->tx) {
                b43_dmacontroller_tx_reset(ring->dev, ring->mmio_base,
-                                          ring->dma64);
-               if (ring->dma64) {
+                                          ring->type);
+               if (ring->type == B43_DMA_64BIT) {
                        b43_dma_write(ring, B43_DMA64_TXRINGLO, 0);
                        b43_dma_write(ring, B43_DMA64_TXRINGHI, 0);
                } else
                        b43_dma_write(ring, B43_DMA32_TXRING, 0);
        } else {
                b43_dmacontroller_rx_reset(ring->dev, ring->mmio_base,
-                                          ring->dma64);
-               if (ring->dma64) {
+                                          ring->type);
+               if (ring->type == B43_DMA_64BIT) {
                        b43_dma_write(ring, B43_DMA64_RXRINGLO, 0);
                        b43_dma_write(ring, B43_DMA64_RXRINGHI, 0);
                } else
@@ -750,7 +770,7 @@ static void free_all_descbuffers(struct b43_dmaring *ring)
        for (i = 0; i < ring->nr_slots; i++) {
                desc = ring->ops->idx2desc(ring, i, &meta);
 
-               if (!meta->skb) {
+               if (!meta->skb || b43_dma_ptr_is_poisoned(meta->skb)) {
                        B43_WARN_ON(!ring->tx);
                        continue;
                }
@@ -772,82 +792,59 @@ static u64 supported_dma_mask(struct b43_wldev *dev)
 
        tmp = b43_read32(dev, SSB_TMSHIGH);
        if (tmp & SSB_TMSHIGH_DMA64)
-               return DMA_64BIT_MASK;
+               return DMA_BIT_MASK(64);
        mmio_base = b43_dmacontroller_base(0, 0);
        b43_write32(dev, mmio_base + B43_DMA32_TXCTL, B43_DMA32_TXADDREXT_MASK);
        tmp = b43_read32(dev, mmio_base + B43_DMA32_TXCTL);
        if (tmp & B43_DMA32_TXADDREXT_MASK)
-               return DMA_32BIT_MASK;
+               return DMA_BIT_MASK(32);
 
-       return DMA_30BIT_MASK;
+       return DMA_BIT_MASK(30);
+}
+
+static enum b43_dmatype dma_mask_to_engine_type(u64 dmamask)
+{
+       if (dmamask == DMA_BIT_MASK(30))
+               return B43_DMA_30BIT;
+       if (dmamask == DMA_BIT_MASK(32))
+               return B43_DMA_32BIT;
+       if (dmamask == DMA_BIT_MASK(64))
+               return B43_DMA_64BIT;
+       B43_WARN_ON(1);
+       return B43_DMA_30BIT;
 }
 
 /* Main initialization function. */
 static
 struct b43_dmaring *b43_setup_dmaring(struct b43_wldev *dev,
                                      int controller_index,
-                                     int for_tx, int dma64)
+                                     int for_tx,
+                                     enum b43_dmatype type)
 {
        struct b43_dmaring *ring;
-       int err;
-       int nr_slots;
+       int i, err;
        dma_addr_t dma_test;
 
        ring = kzalloc(sizeof(*ring), GFP_KERNEL);
        if (!ring)
                goto out;
 
-       nr_slots = B43_RXRING_SLOTS;
+       ring->nr_slots = B43_RXRING_SLOTS;
        if (for_tx)
-               nr_slots = B43_TXRING_SLOTS;
+               ring->nr_slots = B43_TXRING_SLOTS;
 
-       ring->meta = kcalloc(nr_slots, sizeof(struct b43_dmadesc_meta),
+       ring->meta = kcalloc(ring->nr_slots, sizeof(struct b43_dmadesc_meta),
                             GFP_KERNEL);
        if (!ring->meta)
                goto err_kfree_ring;
-       if (for_tx) {
-               ring->txhdr_cache = kcalloc(nr_slots,
-                                           sizeof(struct b43_txhdr_fw4),
-                                           GFP_KERNEL);
-               if (!ring->txhdr_cache)
-                       goto err_kfree_meta;
-
-               /* test for ability to dma to txhdr_cache */
-               dma_test = dma_map_single(dev->dev->dev,
-                                         ring->txhdr_cache,
-                                         sizeof(struct b43_txhdr_fw4),
-                                         DMA_TO_DEVICE);
-
-               if (dma_mapping_error(dma_test)) {
-                       /* ugh realloc */
-                       kfree(ring->txhdr_cache);
-                       ring->txhdr_cache = kcalloc(nr_slots,
-                                                   sizeof(struct
-                                                          b43_txhdr_fw4),
-                                                   GFP_KERNEL | GFP_DMA);
-                       if (!ring->txhdr_cache)
-                               goto err_kfree_meta;
-
-                       dma_test = dma_map_single(dev->dev->dev,
-                                                 ring->txhdr_cache,
-                                                 sizeof(struct b43_txhdr_fw4),
-                                                 DMA_TO_DEVICE);
-
-                       if (dma_mapping_error(dma_test))
-                               goto err_kfree_txhdr_cache;
-               }
-
-               dma_unmap_single(dev->dev->dev,
-                                dma_test, sizeof(struct b43_txhdr_fw4),
-                                DMA_TO_DEVICE);
-       }
+       for (i = 0; i < ring->nr_slots; i++)
+               ring->meta->skb = B43_DMA_PTR_POISON;
 
+       ring->type = type;
        ring->dev = dev;
-       ring->nr_slots = nr_slots;
-       ring->mmio_base = b43_dmacontroller_base(dma64, controller_index);
+       ring->mmio_base = b43_dmacontroller_base(type, controller_index);
        ring->index = controller_index;
-       ring->dma64 = !!dma64;
-       if (dma64)
+       if (type == B43_DMA_64BIT)
                ring->ops = &dma64_ops;
        else
                ring->ops = &dma32_ops;
@@ -858,17 +855,58 @@ struct b43_dmaring *b43_setup_dmaring(struct b43_wldev *dev,
                if (ring->index == 0) {
                        ring->rx_buffersize = B43_DMA0_RX_BUFFERSIZE;
                        ring->frameoffset = B43_DMA0_RX_FRAMEOFFSET;
-               } else if (ring->index == 3) {
-                       ring->rx_buffersize = B43_DMA3_RX_BUFFERSIZE;
-                       ring->frameoffset = B43_DMA3_RX_FRAMEOFFSET;
                } else
                        B43_WARN_ON(1);
        }
-       spin_lock_init(&ring->lock);
 #ifdef CONFIG_B43_DEBUG
        ring->last_injected_overflow = jiffies;
 #endif
 
+       if (for_tx) {
+               /* Assumption: B43_TXRING_SLOTS can be divided by TX_SLOTS_PER_FRAME */
+               BUILD_BUG_ON(B43_TXRING_SLOTS % TX_SLOTS_PER_FRAME != 0);
+
+               ring->txhdr_cache = kcalloc(ring->nr_slots / TX_SLOTS_PER_FRAME,
+                                           b43_txhdr_size(dev),
+                                           GFP_KERNEL);
+               if (!ring->txhdr_cache)
+                       goto err_kfree_meta;
+
+               /* test for ability to dma to txhdr_cache */
+               dma_test = ssb_dma_map_single(dev->dev,
+                                             ring->txhdr_cache,
+                                             b43_txhdr_size(dev),
+                                             DMA_TO_DEVICE);
+
+               if (b43_dma_mapping_error(ring, dma_test,
+                                         b43_txhdr_size(dev), 1)) {
+                       /* ugh realloc */
+                       kfree(ring->txhdr_cache);
+                       ring->txhdr_cache = kcalloc(ring->nr_slots / TX_SLOTS_PER_FRAME,
+                                                   b43_txhdr_size(dev),
+                                                   GFP_KERNEL | GFP_DMA);
+                       if (!ring->txhdr_cache)
+                               goto err_kfree_meta;
+
+                       dma_test = ssb_dma_map_single(dev->dev,
+                                                     ring->txhdr_cache,
+                                                     b43_txhdr_size(dev),
+                                                     DMA_TO_DEVICE);
+
+                       if (b43_dma_mapping_error(ring, dma_test,
+                                                 b43_txhdr_size(dev), 1)) {
+
+                               b43err(dev->wl,
+                                      "TXHDR DMA allocation failed\n");
+                               goto err_kfree_txhdr_cache;
+                       }
+               }
+
+               ssb_dma_unmap_single(dev->dev,
+                                    dma_test, b43_txhdr_size(dev),
+                                    DMA_TO_DEVICE);
+       }
+
        err = alloc_ringmemory(ring);
        if (err)
                goto err_kfree_txhdr_cache;
@@ -891,16 +929,52 @@ struct b43_dmaring *b43_setup_dmaring(struct b43_wldev *dev,
        goto out;
 }
 
+#define divide(a, b)   ({      \
+       typeof(a) __a = a;      \
+       do_div(__a, b);         \
+       __a;                    \
+  })
+
+#define modulo(a, b)   ({      \
+       typeof(a) __a = a;      \
+       do_div(__a, b);         \
+  })
+
 /* Main cleanup function. */
-static void b43_destroy_dmaring(struct b43_dmaring *ring)
+static void b43_destroy_dmaring(struct b43_dmaring *ring,
+                               const char *ringname)
 {
        if (!ring)
                return;
 
-       b43dbg(ring->dev->wl, "DMA-%s 0x%04X (%s) max used slots: %d/%d\n",
-              (ring->dma64) ? "64" : "32",
-              ring->mmio_base,
-              (ring->tx) ? "TX" : "RX", ring->max_used_slots, ring->nr_slots);
+#ifdef CONFIG_B43_DEBUG
+       {
+               /* Print some statistics. */
+               u64 failed_packets = ring->nr_failed_tx_packets;
+               u64 succeed_packets = ring->nr_succeed_tx_packets;
+               u64 nr_packets = failed_packets + succeed_packets;
+               u64 permille_failed = 0, average_tries = 0;
+
+               if (nr_packets)
+                       permille_failed = divide(failed_packets * 1000, nr_packets);
+               if (nr_packets)
+                       average_tries = divide(ring->nr_total_packet_tries * 100, nr_packets);
+
+               b43dbg(ring->dev->wl, "DMA-%u %s: "
+                      "Used slots %d/%d, Failed frames %llu/%llu = %llu.%01llu%%, "
+                      "Average tries %llu.%02llu\n",
+                      (unsigned int)(ring->type), ringname,
+                      ring->max_used_slots,
+                      ring->nr_slots,
+                      (unsigned long long)failed_packets,
+                      (unsigned long long)nr_packets,
+                      (unsigned long long)divide(permille_failed, 10),
+                      (unsigned long long)modulo(permille_failed, 10),
+                      (unsigned long long)divide(average_tries, 100),
+                      (unsigned long long)modulo(average_tries, 100));
+       }
+#endif /* DEBUG */
+
        /* Device IRQs are disabled prior entering this function,
         * so no need to take care of concurrency with rx handler stuff.
         */
@@ -913,168 +987,141 @@ static void b43_destroy_dmaring(struct b43_dmaring *ring)
        kfree(ring);
 }
 
+#define destroy_ring(dma, ring) do {                           \
+       b43_destroy_dmaring((dma)->ring, __stringify(ring));    \
+       (dma)->ring = NULL;                                     \
+    } while (0)
+
 void b43_dma_free(struct b43_wldev *dev)
 {
        struct b43_dma *dma;
 
-       if (b43_using_pio(dev))
+       if (b43_using_pio_transfers(dev))
                return;
        dma = &dev->dma;
 
-       b43_destroy_dmaring(dma->rx_ring3);
-       dma->rx_ring3 = NULL;
-       b43_destroy_dmaring(dma->rx_ring0);
-       dma->rx_ring0 = NULL;
-
-       b43_destroy_dmaring(dma->tx_ring5);
-       dma->tx_ring5 = NULL;
-       b43_destroy_dmaring(dma->tx_ring4);
-       dma->tx_ring4 = NULL;
-       b43_destroy_dmaring(dma->tx_ring3);
-       dma->tx_ring3 = NULL;
-       b43_destroy_dmaring(dma->tx_ring2);
-       dma->tx_ring2 = NULL;
-       b43_destroy_dmaring(dma->tx_ring1);
-       dma->tx_ring1 = NULL;
-       b43_destroy_dmaring(dma->tx_ring0);
-       dma->tx_ring0 = NULL;
+       destroy_ring(dma, rx_ring);
+       destroy_ring(dma, tx_ring_AC_BK);
+       destroy_ring(dma, tx_ring_AC_BE);
+       destroy_ring(dma, tx_ring_AC_VI);
+       destroy_ring(dma, tx_ring_AC_VO);
+       destroy_ring(dma, tx_ring_mcast);
+}
+
+static int b43_dma_set_mask(struct b43_wldev *dev, u64 mask)
+{
+       u64 orig_mask = mask;
+       bool fallback = 0;
+       int err;
+
+       /* Try to set the DMA mask. If it fails, try falling back to a
+        * lower mask, as we can always also support a lower one. */
+       while (1) {
+               err = ssb_dma_set_mask(dev->dev, mask);
+               if (!err)
+                       break;
+               if (mask == DMA_BIT_MASK(64)) {
+                       mask = DMA_BIT_MASK(32);
+                       fallback = 1;
+                       continue;
+               }
+               if (mask == DMA_BIT_MASK(32)) {
+                       mask = DMA_BIT_MASK(30);
+                       fallback = 1;
+                       continue;
+               }
+               b43err(dev->wl, "The machine/kernel does not support "
+                      "the required %u-bit DMA mask\n",
+                      (unsigned int)dma_mask_to_engine_type(orig_mask));
+               return -EOPNOTSUPP;
+       }
+       if (fallback) {
+               b43info(dev->wl, "DMA mask fallback from %u-bit to %u-bit\n",
+                       (unsigned int)dma_mask_to_engine_type(orig_mask),
+                       (unsigned int)dma_mask_to_engine_type(mask));
+       }
+
+       return 0;
 }
 
 int b43_dma_init(struct b43_wldev *dev)
 {
        struct b43_dma *dma = &dev->dma;
-       struct b43_dmaring *ring;
        int err;
        u64 dmamask;
-       int dma64 = 0;
+       enum b43_dmatype type;
 
        dmamask = supported_dma_mask(dev);
-       if (dmamask == DMA_64BIT_MASK)
-               dma64 = 1;
-
-       err = ssb_dma_set_mask(dev->dev, dmamask);
-       if (err) {
-#ifdef B43_PIO
-               b43warn(dev->wl, "DMA for this device not supported. "
-                       "Falling back to PIO\n");
-               dev->__using_pio = 1;
-               return -EAGAIN;
-#else
-               b43err(dev->wl, "DMA for this device not supported and "
-                      "no PIO support compiled in\n");
-               return -EOPNOTSUPP;
-#endif
-       }
+       type = dma_mask_to_engine_type(dmamask);
+       err = b43_dma_set_mask(dev, dmamask);
+       if (err)
+               return err;
 
        err = -ENOMEM;
        /* setup TX DMA channels. */
-       ring = b43_setup_dmaring(dev, 0, 1, dma64);
-       if (!ring)
+       dma->tx_ring_AC_BK = b43_setup_dmaring(dev, 0, 1, type);
+       if (!dma->tx_ring_AC_BK)
                goto out;
-       dma->tx_ring0 = ring;
 
-       ring = b43_setup_dmaring(dev, 1, 1, dma64);
-       if (!ring)
-               goto err_destroy_tx0;
-       dma->tx_ring1 = ring;
+       dma->tx_ring_AC_BE = b43_setup_dmaring(dev, 1, 1, type);
+       if (!dma->tx_ring_AC_BE)
+               goto err_destroy_bk;
 
-       ring = b43_setup_dmaring(dev, 2, 1, dma64);
-       if (!ring)
-               goto err_destroy_tx1;
-       dma->tx_ring2 = ring;
+       dma->tx_ring_AC_VI = b43_setup_dmaring(dev, 2, 1, type);
+       if (!dma->tx_ring_AC_VI)
+               goto err_destroy_be;
 
-       ring = b43_setup_dmaring(dev, 3, 1, dma64);
-       if (!ring)
-               goto err_destroy_tx2;
-       dma->tx_ring3 = ring;
+       dma->tx_ring_AC_VO = b43_setup_dmaring(dev, 3, 1, type);
+       if (!dma->tx_ring_AC_VO)
+               goto err_destroy_vi;
 
-       ring = b43_setup_dmaring(dev, 4, 1, dma64);
-       if (!ring)
-               goto err_destroy_tx3;
-       dma->tx_ring4 = ring;
+       dma->tx_ring_mcast = b43_setup_dmaring(dev, 4, 1, type);
+       if (!dma->tx_ring_mcast)
+               goto err_destroy_vo;
 
-       ring = b43_setup_dmaring(dev, 5, 1, dma64);
-       if (!ring)
-               goto err_destroy_tx4;
-       dma->tx_ring5 = ring;
+       /* setup RX DMA channel. */
+       dma->rx_ring = b43_setup_dmaring(dev, 0, 0, type);
+       if (!dma->rx_ring)
+               goto err_destroy_mcast;
 
-       /* setup RX DMA channels. */
-       ring = b43_setup_dmaring(dev, 0, 0, dma64);
-       if (!ring)
-               goto err_destroy_tx5;
-       dma->rx_ring0 = ring;
-
-       if (dev->dev->id.revision < 5) {
-               ring = b43_setup_dmaring(dev, 3, 0, dma64);
-               if (!ring)
-                       goto err_destroy_rx0;
-               dma->rx_ring3 = ring;
-       }
+       /* No support for the TX status DMA ring. */
+       B43_WARN_ON(dev->dev->id.revision < 5);
 
-       b43dbg(dev->wl, "%d-bit DMA initialized\n",
-              (dmamask == DMA_64BIT_MASK) ? 64 :
-              (dmamask == DMA_32BIT_MASK) ? 32 : 30);
+       b43dbg(dev->wl, "%u-bit DMA initialized\n",
+              (unsigned int)type);
        err = 0;
-      out:
+out:
        return err;
 
-      err_destroy_rx0:
-       b43_destroy_dmaring(dma->rx_ring0);
-       dma->rx_ring0 = NULL;
-      err_destroy_tx5:
-       b43_destroy_dmaring(dma->tx_ring5);
-       dma->tx_ring5 = NULL;
-      err_destroy_tx4:
-       b43_destroy_dmaring(dma->tx_ring4);
-       dma->tx_ring4 = NULL;
-      err_destroy_tx3:
-       b43_destroy_dmaring(dma->tx_ring3);
-       dma->tx_ring3 = NULL;
-      err_destroy_tx2:
-       b43_destroy_dmaring(dma->tx_ring2);
-       dma->tx_ring2 = NULL;
-      err_destroy_tx1:
-       b43_destroy_dmaring(dma->tx_ring1);
-       dma->tx_ring1 = NULL;
-      err_destroy_tx0:
-       b43_destroy_dmaring(dma->tx_ring0);
-       dma->tx_ring0 = NULL;
-       goto out;
+err_destroy_mcast:
+       destroy_ring(dma, tx_ring_mcast);
+err_destroy_vo:
+       destroy_ring(dma, tx_ring_AC_VO);
+err_destroy_vi:
+       destroy_ring(dma, tx_ring_AC_VI);
+err_destroy_be:
+       destroy_ring(dma, tx_ring_AC_BE);
+err_destroy_bk:
+       destroy_ring(dma, tx_ring_AC_BK);
+       return err;
 }
 
 /* Generate a cookie for the TX header. */
 static u16 generate_cookie(struct b43_dmaring *ring, int slot)
 {
-       u16 cookie = 0x1000;
+       u16 cookie;
 
        /* Use the upper 4 bits of the cookie as
         * DMA controller ID and store the slot number
         * in the lower 12 bits.
         * Note that the cookie must never be 0, as this
         * is a special value used in RX path.
+        * It can also not be 0xFFFF because that is special
+        * for multicast frames.
         */
-       switch (ring->index) {
-       case 0:
-               cookie = 0xA000;
-               break;
-       case 1:
-               cookie = 0xB000;
-               break;
-       case 2:
-               cookie = 0xC000;
-               break;
-       case 3:
-               cookie = 0xD000;
-               break;
-       case 4:
-               cookie = 0xE000;
-               break;
-       case 5:
-               cookie = 0xF000;
-               break;
-       }
+       cookie = (((u16)ring->index + 1) << 12);
        B43_WARN_ON(slot & ~0x0FFF);
-       cookie |= (u16) slot;
+       cookie |= (u16)slot;
 
        return cookie;
 }
@@ -1087,107 +1134,128 @@ struct b43_dmaring *parse_cookie(struct b43_wldev *dev, u16 cookie, int *slot)
        struct b43_dmaring *ring = NULL;
 
        switch (cookie & 0xF000) {
-       case 0xA000:
-               ring = dma->tx_ring0;
+       case 0x1000:
+               ring = dma->tx_ring_AC_BK;
                break;
-       case 0xB000:
-               ring = dma->tx_ring1;
+       case 0x2000:
+               ring = dma->tx_ring_AC_BE;
                break;
-       case 0xC000:
-               ring = dma->tx_ring2;
+       case 0x3000:
+               ring = dma->tx_ring_AC_VI;
                break;
-       case 0xD000:
-               ring = dma->tx_ring3;
+       case 0x4000:
+               ring = dma->tx_ring_AC_VO;
                break;
-       case 0xE000:
-               ring = dma->tx_ring4;
+       case 0x5000:
+               ring = dma->tx_ring_mcast;
                break;
-       case 0xF000:
-               ring = dma->tx_ring5;
-               break;
-       default:
-               B43_WARN_ON(1);
        }
        *slot = (cookie & 0x0FFF);
-       B43_WARN_ON(!(ring && *slot >= 0 && *slot < ring->nr_slots));
+       if (unlikely(!ring || *slot < 0 || *slot >= ring->nr_slots)) {
+               b43dbg(dev->wl, "TX-status contains "
+                      "invalid cookie: 0x%04X\n", cookie);
+               return NULL;
+       }
 
        return ring;
 }
 
 static int dma_tx_fragment(struct b43_dmaring *ring,
-                          struct sk_buff *skb,
-                          struct ieee80211_tx_control *ctl)
+                          struct sk_buff *skb)
 {
        const struct b43_dma_ops *ops = ring->ops;
+       struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+       struct b43_private_tx_info *priv_info = b43_get_priv_tx_info(info);
        u8 *header;
-       int slot;
+       int slot, old_top_slot, old_used_slots;
        int err;
        struct b43_dmadesc_generic *desc;
        struct b43_dmadesc_meta *meta;
        struct b43_dmadesc_meta *meta_hdr;
-       struct sk_buff *bounce_skb;
+       u16 cookie;
+       size_t hdrsize = b43_txhdr_size(ring->dev);
+
+       /* Important note: If the number of used DMA slots per TX frame
+        * is changed here, the TX_SLOTS_PER_FRAME definition at the top of
+        * the file has to be updated, too!
+        */
 
-#define SLOTS_PER_PACKET  2
-       B43_WARN_ON(skb_shinfo(skb)->nr_frags);
+       old_top_slot = ring->current_slot;
+       old_used_slots = ring->used_slots;
 
        /* Get a slot for the header. */
        slot = request_slot(ring);
        desc = ops->idx2desc(ring, slot, &meta_hdr);
        memset(meta_hdr, 0, sizeof(*meta_hdr));
 
-       header = &(ring->txhdr_cache[slot * sizeof(struct b43_txhdr_fw4)]);
-       b43_generate_txhdr(ring->dev, header,
-                          skb->data, skb->len, ctl,
-                          generate_cookie(ring, slot));
+       header = &(ring->txhdr_cache[(slot / TX_SLOTS_PER_FRAME) * hdrsize]);
+       cookie = generate_cookie(ring, slot);
+       err = b43_generate_txhdr(ring->dev, header,
+                                skb, info, cookie);
+       if (unlikely(err)) {
+               ring->current_slot = old_top_slot;
+               ring->used_slots = old_used_slots;
+               return err;
+       }
 
        meta_hdr->dmaaddr = map_descbuffer(ring, (unsigned char *)header,
-                                          sizeof(struct b43_txhdr_fw4), 1);
-       if (dma_mapping_error(meta_hdr->dmaaddr))
+                                          hdrsize, 1);
+       if (b43_dma_mapping_error(ring, meta_hdr->dmaaddr, hdrsize, 1)) {
+               ring->current_slot = old_top_slot;
+               ring->used_slots = old_used_slots;
                return -EIO;
+       }
        ops->fill_descriptor(ring, desc, meta_hdr->dmaaddr,
-                            sizeof(struct b43_txhdr_fw4), 1, 0, 0);
+                            hdrsize, 1, 0, 0);
 
        /* Get a slot for the payload. */
        slot = request_slot(ring);
        desc = ops->idx2desc(ring, slot, &meta);
        memset(meta, 0, sizeof(*meta));
 
-       memcpy(&meta->txstat.control, ctl, sizeof(*ctl));
        meta->skb = skb;
        meta->is_last_fragment = 1;
+       priv_info->bouncebuffer = NULL;
 
        meta->dmaaddr = map_descbuffer(ring, skb->data, skb->len, 1);
        /* create a bounce buffer in zone_dma on mapping failure. */
-       if (dma_mapping_error(meta->dmaaddr)) {
-               bounce_skb = __dev_alloc_skb(skb->len, GFP_ATOMIC | GFP_DMA);
-               if (!bounce_skb) {
+       if (b43_dma_mapping_error(ring, meta->dmaaddr, skb->len, 1)) {
+               priv_info->bouncebuffer = kmalloc(skb->len, GFP_ATOMIC | GFP_DMA);
+               if (!priv_info->bouncebuffer) {
+                       ring->current_slot = old_top_slot;
+                       ring->used_slots = old_used_slots;
                        err = -ENOMEM;
                        goto out_unmap_hdr;
                }
-
-               memcpy(skb_put(bounce_skb, skb->len), skb->data, skb->len);
-               dev_kfree_skb_any(skb);
-               skb = bounce_skb;
-               meta->skb = skb;
-               meta->dmaaddr = map_descbuffer(ring, skb->data, skb->len, 1);
-               if (dma_mapping_error(meta->dmaaddr)) {
+               memcpy(priv_info->bouncebuffer, skb->data, skb->len);
+
+               meta->dmaaddr = map_descbuffer(ring, priv_info->bouncebuffer, skb->len, 1);
+               if (b43_dma_mapping_error(ring, meta->dmaaddr, skb->len, 1)) {
+                       kfree(priv_info->bouncebuffer);
+                       priv_info->bouncebuffer = NULL;
+                       ring->current_slot = old_top_slot;
+                       ring->used_slots = old_used_slots;
                        err = -EIO;
-                       goto out_free_bounce;
+                       goto out_unmap_hdr;
                }
        }
 
        ops->fill_descriptor(ring, desc, meta->dmaaddr, skb->len, 0, 1, 1);
 
+       if (info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) {
+               /* Tell the firmware about the cookie of the last
+                * mcast frame, so it can clear the more-data bit in it. */
+               b43_shm_write16(ring->dev, B43_SHM_SHARED,
+                               B43_SHM_SH_MCASTCOOKIE, cookie);
+       }
        /* Now transfer the whole frame. */
        wmb();
        ops->poke_tx(ring, next_slot(ring, slot));
        return 0;
 
-      out_free_bounce:
-       dev_kfree_skb_any(skb);
-      out_unmap_hdr:
+out_unmap_hdr:
        unmap_descbuffer(ring, meta_hdr->dmaaddr,
-                        sizeof(struct b43_txhdr_fw4), 1);
+                        hdrsize, 1);
        return err;
 }
 
@@ -1212,43 +1280,106 @@ static inline int should_inject_overflow(struct b43_dmaring *ring)
        return 0;
 }
 
-int b43_dma_tx(struct b43_wldev *dev,
-              struct sk_buff *skb, struct ieee80211_tx_control *ctl)
+/* Static mapping of mac80211's queues (priorities) to b43 DMA rings. */
+static struct b43_dmaring *select_ring_by_priority(struct b43_wldev *dev,
+                                                  u8 queue_prio)
 {
        struct b43_dmaring *ring;
+
+       if (dev->qos_enabled) {
+               /* 0 = highest priority */
+               switch (queue_prio) {
+               default:
+                       B43_WARN_ON(1);
+                       /* fallthrough */
+               case 0:
+                       ring = dev->dma.tx_ring_AC_VO;
+                       break;
+               case 1:
+                       ring = dev->dma.tx_ring_AC_VI;
+                       break;
+               case 2:
+                       ring = dev->dma.tx_ring_AC_BE;
+                       break;
+               case 3:
+                       ring = dev->dma.tx_ring_AC_BK;
+                       break;
+               }
+       } else
+               ring = dev->dma.tx_ring_AC_BE;
+
+       return ring;
+}
+
+int b43_dma_tx(struct b43_wldev *dev, struct sk_buff *skb)
+{
+       struct b43_dmaring *ring;
+       struct ieee80211_hdr *hdr;
        int err = 0;
-       unsigned long flags;
+       struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+
+       hdr = (struct ieee80211_hdr *)skb->data;
+       if (info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) {
+               /* The multicast ring will be sent after the DTIM */
+               ring = dev->dma.tx_ring_mcast;
+               /* Set the more-data bit. Ucode will clear it on
+                * the last frame for us. */
+               hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA);
+       } else {
+               /* Decide by priority where to put this frame. */
+               ring = select_ring_by_priority(
+                       dev, skb_get_queue_mapping(skb));
+       }
 
-       ring = priority_to_txring(dev, ctl->queue);
-       spin_lock_irqsave(&ring->lock, flags);
        B43_WARN_ON(!ring->tx);
-       if (unlikely(free_slots(ring) < SLOTS_PER_PACKET)) {
-               b43warn(dev->wl, "DMA queue overflow\n");
+
+       if (unlikely(ring->stopped)) {
+               /* We get here only because of a bug in mac80211.
+                * Because of a race, one packet may be queued after
+                * the queue is stopped, thus we got called when we shouldn't.
+                * For now, just refuse the transmit. */
+               if (b43_debug(dev, B43_DBG_DMAVERBOSE))
+                       b43err(dev->wl, "Packet after queue stopped\n");
                err = -ENOSPC;
-               goto out_unlock;
+               goto out;
        }
-       /* Check if the queue was stopped in mac80211,
-        * but we got called nevertheless.
-        * That would be a mac80211 bug. */
-       B43_WARN_ON(ring->stopped);
 
-       err = dma_tx_fragment(ring, skb, ctl);
+       if (unlikely(WARN_ON(free_slots(ring) < TX_SLOTS_PER_FRAME))) {
+               /* If we get here, we have a real error with the queue
+                * full, but queues not stopped. */
+               b43err(dev->wl, "DMA queue overflow\n");
+               err = -ENOSPC;
+               goto out;
+       }
+
+       /* Assign the queue number to the ring (if not already done before)
+        * so TX status handling can use it. The queue to ring mapping is
+        * static, so we don't need to store it per frame. */
+       ring->queue_prio = skb_get_queue_mapping(skb);
+
+       err = dma_tx_fragment(ring, skb);
+       if (unlikely(err == -ENOKEY)) {
+               /* Drop this packet, as we don't have the encryption key
+                * anymore and must not transmit it unencrypted. */
+               dev_kfree_skb_any(skb);
+               err = 0;
+               goto out;
+       }
        if (unlikely(err)) {
                b43err(dev->wl, "DMA tx mapping failure\n");
-               goto out_unlock;
+               goto out;
        }
        ring->nr_tx_packets++;
-       if ((free_slots(ring) < SLOTS_PER_PACKET) ||
+       if ((free_slots(ring) < TX_SLOTS_PER_FRAME) ||
            should_inject_overflow(ring)) {
                /* This TX ring is full. */
-               ieee80211_stop_queue(dev->wl->hw, txring_to_priority(ring));
+               ieee80211_stop_queue(dev->wl->hw, skb_get_queue_mapping(skb));
                ring->stopped = 1;
                if (b43_debug(dev, B43_DBG_DMAVERBOSE)) {
                        b43dbg(dev->wl, "Stopped TX ring %d\n", ring->index);
                }
        }
-      out_unlock:
-       spin_unlock_irqrestore(&ring->lock, flags);
+out:
 
        return err;
 }
@@ -1260,74 +1391,113 @@ void b43_dma_handle_txstatus(struct b43_wldev *dev,
        struct b43_dmaring *ring;
        struct b43_dmadesc_generic *desc;
        struct b43_dmadesc_meta *meta;
-       int slot;
+       int slot, firstused;
+       bool frame_succeed;
 
        ring = parse_cookie(dev, status->cookie, &slot);
        if (unlikely(!ring))
                return;
-       B43_WARN_ON(!irqs_disabled());
-       spin_lock(&ring->lock);
-
        B43_WARN_ON(!ring->tx);
+
+       /* Sanity check: TX packets are processed in-order on one ring.
+        * Check if the slot deduced from the cookie really is the first
+        * used slot. */
+       firstused = ring->current_slot - ring->used_slots + 1;
+       if (firstused < 0)
+               firstused = ring->nr_slots + firstused;
+       if (unlikely(slot != firstused)) {
+               /* This possibly is a firmware bug and will result in
+                * malfunction, memory leaks and/or stall of DMA functionality. */
+               b43dbg(dev->wl, "Out of order TX status report on DMA ring %d. "
+                      "Expected %d, but got %d\n",
+                      ring->index, firstused, slot);
+               return;
+       }
+
        ops = ring->ops;
        while (1) {
-               B43_WARN_ON(!(slot >= 0 && slot < ring->nr_slots));
+               B43_WARN_ON(slot < 0 || slot >= ring->nr_slots);
                desc = ops->idx2desc(ring, slot, &meta);
 
-               if (meta->skb)
-                       unmap_descbuffer(ring, meta->dmaaddr, meta->skb->len,
-                                        1);
-               else
+               if (b43_dma_ptr_is_poisoned(meta->skb)) {
+                       b43dbg(dev->wl, "Poisoned TX slot %d (first=%d) "
+                              "on ring %d\n",
+                              slot, firstused, ring->index);
+                       break;
+               }
+               if (meta->skb) {
+                       struct b43_private_tx_info *priv_info =
+                               b43_get_priv_tx_info(IEEE80211_SKB_CB(meta->skb));
+
+                       unmap_descbuffer(ring, meta->dmaaddr, meta->skb->len, 1);
+                       kfree(priv_info->bouncebuffer);
+                       priv_info->bouncebuffer = NULL;
+               } else {
                        unmap_descbuffer(ring, meta->dmaaddr,
-                                        sizeof(struct b43_txhdr_fw4), 1);
+                                        b43_txhdr_size(dev), 1);
+               }
 
                if (meta->is_last_fragment) {
-                       B43_WARN_ON(!meta->skb);
-                       /* Call back to inform the ieee80211 subsystem about the
-                        * status of the transmission.
-                        * Some fields of txstat are already filled in dma_tx().
-                        */
-                       if (status->acked) {
-                               meta->txstat.flags |= IEEE80211_TX_STATUS_ACK;
-                       } else {
-                               if (!(meta->txstat.control.flags
-                                     & IEEE80211_TXCTL_NO_ACK))
-                                       meta->txstat.excessive_retries = 1;
+                       struct ieee80211_tx_info *info;
+
+                       if (unlikely(!meta->skb)) {
+                               /* This is a scatter-gather fragment of a frame, so
+                                * the skb pointer must not be NULL. */
+                               b43dbg(dev->wl, "TX status unexpected NULL skb "
+                                      "at slot %d (first=%d) on ring %d\n",
+                                      slot, firstused, ring->index);
+                               break;
                        }
-                       if (status->frame_count == 0) {
-                               /* The frame was not transmitted at all. */
-                               meta->txstat.retry_count = 0;
-                       } else
-                               meta->txstat.retry_count = status->frame_count - 1;
-                       ieee80211_tx_status_irqsafe(dev->wl->hw, meta->skb,
-                                                   &(meta->txstat));
-                       /* skb is freed by ieee80211_tx_status_irqsafe() */
-                       meta->skb = NULL;
+
+                       info = IEEE80211_SKB_CB(meta->skb);
+
+                       /*
+                        * Call back to inform the ieee80211 subsystem about
+                        * the status of the transmission.
+                        */
+                       frame_succeed = b43_fill_txstatus_report(dev, info, status);
+#ifdef CONFIG_B43_DEBUG
+                       if (frame_succeed)
+                               ring->nr_succeed_tx_packets++;
+                       else
+                               ring->nr_failed_tx_packets++;
+                       ring->nr_total_packet_tries += status->frame_count;
+#endif /* DEBUG */
+                       ieee80211_tx_status(dev->wl->hw, meta->skb);
+
+                       /* skb will be freed by ieee80211_tx_status().
+                        * Poison our pointer. */
+                       meta->skb = B43_DMA_PTR_POISON;
                } else {
                        /* No need to call free_descriptor_buffer here, as
                         * this is only the txhdr, which is not allocated.
                         */
-                       B43_WARN_ON(meta->skb);
+                       if (unlikely(meta->skb)) {
+                               b43dbg(dev->wl, "TX status unexpected non-NULL skb "
+                                      "at slot %d (first=%d) on ring %d\n",
+                                      slot, firstused, ring->index);
+                               break;
+                       }
                }
 
                /* Everything unmapped and free'd. So it's not used anymore. */
                ring->used_slots--;
 
-               if (meta->is_last_fragment)
+               if (meta->is_last_fragment) {
+                       /* This is the last scatter-gather
+                        * fragment of the frame. We are done. */
                        break;
+               }
                slot = next_slot(ring, slot);
        }
-       dev->stats.last_tx = jiffies;
        if (ring->stopped) {
-               B43_WARN_ON(free_slots(ring) < SLOTS_PER_PACKET);
-               ieee80211_wake_queue(dev->wl->hw, txring_to_priority(ring));
+               B43_WARN_ON(free_slots(ring) < TX_SLOTS_PER_FRAME);
+               ieee80211_wake_queue(dev->wl->hw, ring->queue_prio);
                ring->stopped = 0;
                if (b43_debug(dev, B43_DBG_DMAVERBOSE)) {
                        b43dbg(dev->wl, "Woke up TX ring %d\n", ring->index);
                }
        }
-
-       spin_unlock(&ring->lock);
 }
 
 void b43_dma_get_tx_stats(struct b43_wldev *dev,
@@ -1335,19 +1505,14 @@ void b43_dma_get_tx_stats(struct b43_wldev *dev,
 {
        const int nr_queues = dev->wl->hw->queues;
        struct b43_dmaring *ring;
-       struct ieee80211_tx_queue_stats_data *data;
-       unsigned long flags;
        int i;
 
        for (i = 0; i < nr_queues; i++) {
-               data = &(stats->data[i]);
-               ring = priority_to_txring(dev, i);
-
-               spin_lock_irqsave(&ring->lock, flags);
-               data->len = ring->used_slots / SLOTS_PER_PACKET;
-               data->limit = ring->nr_slots / SLOTS_PER_PACKET;
-               data->count = ring->nr_tx_packets;
-               spin_unlock_irqrestore(&ring->lock, flags);
+               ring = select_ring_by_priority(dev, i);
+
+               stats[i].len = ring->used_slots / TX_SLOTS_PER_FRAME;
+               stats[i].limit = ring->nr_slots / TX_SLOTS_PER_FRAME;
+               stats[i].count = ring->nr_tx_packets;
        }
 }
 
@@ -1367,25 +1532,6 @@ static void dma_rx(struct b43_dmaring *ring, int *slot)
        sync_descbuffer_for_cpu(ring, meta->dmaaddr, ring->rx_buffersize);
        skb = meta->skb;
 
-       if (ring->index == 3) {
-               /* We received an xmit status. */
-               struct b43_hwtxstatus *hw = (struct b43_hwtxstatus *)skb->data;
-               int i = 0;
-
-               while (hw->cookie == 0) {
-                       if (i > 100)
-                               break;
-                       i++;
-                       udelay(2);
-                       barrier();
-               }
-               b43_handle_hwtxstatus(ring->dev, hw);
-               /* recycle the descriptor buffer. */
-               sync_descbuffer_for_device(ring, meta->dmaaddr,
-                                          ring->rx_buffersize);
-
-               return;
-       }
        rxhdr = (struct b43_rxhdr_fw4 *)skb->data;
        len = le16_to_cpu(rxhdr->frame_len);
        if (len == 0) {
@@ -1397,12 +1543,17 @@ static void dma_rx(struct b43_dmaring *ring, int *slot)
                        len = le16_to_cpu(rxhdr->frame_len);
                } while (len == 0 && i++ < 5);
                if (unlikely(len == 0)) {
-                       /* recycle the descriptor buffer. */
-                       sync_descbuffer_for_device(ring, meta->dmaaddr,
-                                                  ring->rx_buffersize);
-                       goto drop;
+                       dmaaddr = meta->dmaaddr;
+                       goto drop_recycle_buffer;
                }
        }
+       if (unlikely(b43_rx_buffer_is_poisoned(ring, skb))) {
+               /* Something went wrong with the DMA.
+                * The device did not touch the buffer and did not overwrite the poison. */
+               b43dbg(ring->dev->wl, "DMA RX: Dropping poisoned buffer.\n");
+               dmaaddr = meta->dmaaddr;
+               goto drop_recycle_buffer;
+       }
        if (unlikely(len > ring->rx_buffersize)) {
                /* The data did not fit into one descriptor buffer
                 * and is split over multiple buffers.
@@ -1415,6 +1566,7 @@ static void dma_rx(struct b43_dmaring *ring, int *slot)
                while (1) {
                        desc = ops->idx2desc(ring, *slot, &meta);
                        /* recycle the descriptor buffer. */
+                       b43_poison_rx_buffer(ring, meta->skb);
                        sync_descbuffer_for_device(ring, meta->dmaaddr,
                                                   ring->rx_buffersize);
                        *slot = next_slot(ring, *slot);
@@ -1433,8 +1585,7 @@ static void dma_rx(struct b43_dmaring *ring, int *slot)
        err = setup_rx_descbuffer(ring, desc, meta, GFP_ATOMIC);
        if (unlikely(err)) {
                b43dbg(ring->dev->wl, "DMA RX: setup_rx_descbuffer() failed\n");
-               sync_descbuffer_for_device(ring, dmaaddr, ring->rx_buffersize);
-               goto drop;
+               goto drop_recycle_buffer;
        }
 
        unmap_descbuffer(ring, dmaaddr, ring->rx_buffersize, 0);
@@ -1442,8 +1593,13 @@ static void dma_rx(struct b43_dmaring *ring, int *slot)
        skb_pull(skb, ring->frameoffset);
 
        b43_rx(ring->dev, skb, rxhdr);
-      drop:
+drop:
        return;
+
+drop_recycle_buffer:
+       /* Poison and recycle the RX buffer. */
+       b43_poison_rx_buffer(ring, skb);
+       sync_descbuffer_for_device(ring, dmaaddr, ring->rx_buffersize);
 }
 
 void b43_dma_rx(struct b43_dmaring *ring)
@@ -1467,42 +1623,68 @@ void b43_dma_rx(struct b43_dmaring *ring)
 
 static void b43_dma_tx_suspend_ring(struct b43_dmaring *ring)
 {
-       unsigned long flags;
-
-       spin_lock_irqsave(&ring->lock, flags);
        B43_WARN_ON(!ring->tx);
        ring->ops->tx_suspend(ring);
-       spin_unlock_irqrestore(&ring->lock, flags);
 }
 
 static void b43_dma_tx_resume_ring(struct b43_dmaring *ring)
 {
-       unsigned long flags;
-
-       spin_lock_irqsave(&ring->lock, flags);
        B43_WARN_ON(!ring->tx);
        ring->ops->tx_resume(ring);
-       spin_unlock_irqrestore(&ring->lock, flags);
 }
 
 void b43_dma_tx_suspend(struct b43_wldev *dev)
 {
        b43_power_saving_ctl_bits(dev, B43_PS_AWAKE);
-       b43_dma_tx_suspend_ring(dev->dma.tx_ring0);
-       b43_dma_tx_suspend_ring(dev->dma.tx_ring1);
-       b43_dma_tx_suspend_ring(dev->dma.tx_ring2);
-       b43_dma_tx_suspend_ring(dev->dma.tx_ring3);
-       b43_dma_tx_suspend_ring(dev->dma.tx_ring4);
-       b43_dma_tx_suspend_ring(dev->dma.tx_ring5);
+       b43_dma_tx_suspend_ring(dev->dma.tx_ring_AC_BK);
+       b43_dma_tx_suspend_ring(dev->dma.tx_ring_AC_BE);
+       b43_dma_tx_suspend_ring(dev->dma.tx_ring_AC_VI);
+       b43_dma_tx_suspend_ring(dev->dma.tx_ring_AC_VO);
+       b43_dma_tx_suspend_ring(dev->dma.tx_ring_mcast);
 }
 
 void b43_dma_tx_resume(struct b43_wldev *dev)
 {
-       b43_dma_tx_resume_ring(dev->dma.tx_ring5);
-       b43_dma_tx_resume_ring(dev->dma.tx_ring4);
-       b43_dma_tx_resume_ring(dev->dma.tx_ring3);
-       b43_dma_tx_resume_ring(dev->dma.tx_ring2);
-       b43_dma_tx_resume_ring(dev->dma.tx_ring1);
-       b43_dma_tx_resume_ring(dev->dma.tx_ring0);
+       b43_dma_tx_resume_ring(dev->dma.tx_ring_mcast);
+       b43_dma_tx_resume_ring(dev->dma.tx_ring_AC_VO);
+       b43_dma_tx_resume_ring(dev->dma.tx_ring_AC_VI);
+       b43_dma_tx_resume_ring(dev->dma.tx_ring_AC_BE);
+       b43_dma_tx_resume_ring(dev->dma.tx_ring_AC_BK);
        b43_power_saving_ctl_bits(dev, 0);
 }
+
+#ifdef CONFIG_B43_PIO
+static void direct_fifo_rx(struct b43_wldev *dev, enum b43_dmatype type,
+                          u16 mmio_base, bool enable)
+{
+       u32 ctl;
+
+       if (type == B43_DMA_64BIT) {
+               ctl = b43_read32(dev, mmio_base + B43_DMA64_RXCTL);
+               ctl &= ~B43_DMA64_RXDIRECTFIFO;
+               if (enable)
+                       ctl |= B43_DMA64_RXDIRECTFIFO;
+               b43_write32(dev, mmio_base + B43_DMA64_RXCTL, ctl);
+       } else {
+               ctl = b43_read32(dev, mmio_base + B43_DMA32_RXCTL);
+               ctl &= ~B43_DMA32_RXDIRECTFIFO;
+               if (enable)
+                       ctl |= B43_DMA32_RXDIRECTFIFO;
+               b43_write32(dev, mmio_base + B43_DMA32_RXCTL, ctl);
+       }
+}
+
+/* Enable/Disable Direct FIFO Receive Mode (PIO) on a RX engine.
+ * This is called from PIO code, so DMA structures are not available. */
+void b43_dma_direct_fifo_rx(struct b43_wldev *dev,
+                           unsigned int engine_index, bool enable)
+{
+       enum b43_dmatype type;
+       u16 mmio_base;
+
+       type = dma_mask_to_engine_type(supported_dma_mask(dev));
+
+       mmio_base = b43_dmacontroller_base(type, engine_index);
+       direct_fifo_rx(dev, type, mmio_base, enable);
+}
+#endif /* CONFIG_B43_PIO */