Merge branch 'for-next' into for-linus
[safe/jmp/linux-2.6] / drivers / net / wireless / ath / ar9170 / main.c
index c54c42e..08dc42d 100644 (file)
@@ -49,6 +49,10 @@ static int modparam_nohwcrypt;
 module_param_named(nohwcrypt, modparam_nohwcrypt, bool, S_IRUGO);
 MODULE_PARM_DESC(nohwcrypt, "Disable hardware encryption.");
 
+static int modparam_ht;
+module_param_named(ht, modparam_ht, bool, S_IRUGO);
+MODULE_PARM_DESC(ht, "enable MPDU aggregation.");
+
 #define RATE(_bitrate, _hw_rate, _txpidx, _flags) {    \
        .bitrate        = (_bitrate),                   \
        .flags          = (_flags),                     \
@@ -146,15 +150,17 @@ static struct ieee80211_channel ar9170_5ghz_chantable[] = {
 {                                                                      \
        .ht_supported   = true,                                         \
        .cap            = IEEE80211_HT_CAP_MAX_AMSDU |                  \
-                         IEEE80211_HT_CAP_SM_PS |                      \
                          IEEE80211_HT_CAP_SUP_WIDTH_20_40 |            \
                          IEEE80211_HT_CAP_SGI_40 |                     \
+                         IEEE80211_HT_CAP_GRN_FLD |                    \
                          IEEE80211_HT_CAP_DSSSCCK40 |                  \
                          IEEE80211_HT_CAP_SM_PS,                       \
        .ampdu_factor   = 3,                                            \
        .ampdu_density  = 6,                                            \
        .mcs            = {                                             \
-               .rx_mask = { 0xFF, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, },     \
+               .rx_mask = { 0xff, 0xff, 0, 0, 0x1, 0, 0, 0, 0, 0, },   \
+               .rx_highest = cpu_to_le16(300),                         \
+               .tx_params = IEEE80211_HT_MCS_TX_DEFINED,               \
        },                                                              \
 }
 
@@ -174,59 +180,159 @@ static struct ieee80211_supported_band ar9170_band_5GHz = {
        .ht_cap         = AR9170_HT_CAP,
 };
 
-#ifdef AR9170_QUEUE_DEBUG
-/*
- * In case some wants works with AR9170's crazy tx_status queueing techniques.
- * He might need this rather useful probing function.
- *
- * NOTE: caller must hold the queue's spinlock!
- */
+static void ar9170_tx(struct ar9170 *ar);
+static bool ar9170_tx_ampdu(struct ar9170 *ar);
+
+static inline u16 ar9170_get_seq_h(struct ieee80211_hdr *hdr)
+{
+       return le16_to_cpu(hdr->seq_ctrl) >> 4;
+}
+
+static inline u16 ar9170_get_seq(struct sk_buff *skb)
+{
+       struct ar9170_tx_control *txc = (void *) skb->data;
+       return ar9170_get_seq_h((void *) txc->frame_data);
+}
+
+static inline u16 ar9170_get_tid_h(struct ieee80211_hdr *hdr)
+{
+       return (ieee80211_get_qos_ctl(hdr))[0] & IEEE80211_QOS_CTL_TID_MASK;
+}
+
+static inline u16 ar9170_get_tid(struct sk_buff *skb)
+{
+       struct ar9170_tx_control *txc = (void *) skb->data;
+       return ar9170_get_tid_h((struct ieee80211_hdr *) txc->frame_data);
+}
+
+#define GET_NEXT_SEQ(seq)      ((seq + 1) & 0x0fff)
+#define GET_NEXT_SEQ_FROM_SKB(skb)     (GET_NEXT_SEQ(ar9170_get_seq(skb)))
 
+#if (defined AR9170_QUEUE_DEBUG) || (defined AR9170_TXAGG_DEBUG)
 static void ar9170_print_txheader(struct ar9170 *ar, struct sk_buff *skb)
 {
        struct ar9170_tx_control *txc = (void *) skb->data;
-       struct ieee80211_hdr *hdr = (void *)txc->frame_data;
+       struct ieee80211_tx_info *txinfo = IEEE80211_SKB_CB(skb);
+       struct ar9170_tx_info *arinfo = (void *) txinfo->rate_driver_data;
+       struct ieee80211_hdr *hdr = (void *) txc->frame_data;
 
-       printk(KERN_DEBUG "%s: => FRAME [skb:%p, queue:%d, DA:[%pM] "
-                         "mac_control:%04x, phy_control:%08x]\n",
+       printk(KERN_DEBUG "%s: => FRAME [skb:%p, q:%d, DA:[%pM] s:%d "
+                         "mac_ctrl:%04x, phy_ctrl:%08x, timeout:[%d ms]]\n",
               wiphy_name(ar->hw->wiphy), skb, skb_get_queue_mapping(skb),
-              ieee80211_get_DA(hdr), le16_to_cpu(txc->mac_control),
-              le32_to_cpu(txc->phy_control));
+              ieee80211_get_DA(hdr), ar9170_get_seq_h(hdr),
+              le16_to_cpu(txc->mac_control), le32_to_cpu(txc->phy_control),
+              jiffies_to_msecs(arinfo->timeout - jiffies));
 }
 
-static void ar9170_dump_station_tx_status_queue(struct ar9170 *ar,
-                                               struct sk_buff_head *queue)
+static void __ar9170_dump_txqueue(struct ar9170 *ar,
+                               struct sk_buff_head *queue)
 {
        struct sk_buff *skb;
        int i = 0;
 
        printk(KERN_DEBUG "---[ cut here ]---\n");
-       printk(KERN_DEBUG "%s: %d entries in tx_status queue.\n",
+       printk(KERN_DEBUG "%s: %d entries in queue.\n",
               wiphy_name(ar->hw->wiphy), skb_queue_len(queue));
 
        skb_queue_walk(queue, skb) {
-               struct ar9170_tx_control *txc = (void *) skb->data;
-               struct ieee80211_hdr *hdr = (void *)txc->frame_data;
-
-               printk(KERN_DEBUG "index:%d => \n", i);
+               printk(KERN_DEBUG "index:%d => \n", i++);
                ar9170_print_txheader(ar, skb);
        }
+       if (i != skb_queue_len(queue))
+               printk(KERN_DEBUG "WARNING: queue frame counter "
+                      "mismatch %d != %d\n", skb_queue_len(queue), i);
        printk(KERN_DEBUG "---[ end ]---\n");
 }
+#endif /* AR9170_QUEUE_DEBUG || AR9170_TXAGG_DEBUG */
+
+#ifdef AR9170_QUEUE_DEBUG
+static void ar9170_dump_txqueue(struct ar9170 *ar,
+                               struct sk_buff_head *queue)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&queue->lock, flags);
+       __ar9170_dump_txqueue(ar, queue);
+       spin_unlock_irqrestore(&queue->lock, flags);
+}
 #endif /* AR9170_QUEUE_DEBUG */
 
-void ar9170_handle_tx_status(struct ar9170 *ar, struct sk_buff *skb,
-                            bool valid_status, u16 tx_status)
+#ifdef AR9170_QUEUE_STOP_DEBUG
+static void __ar9170_dump_txstats(struct ar9170 *ar)
+{
+       int i;
+
+       printk(KERN_DEBUG "%s: QoS queue stats\n",
+              wiphy_name(ar->hw->wiphy));
+
+       for (i = 0; i < __AR9170_NUM_TXQ; i++)
+               printk(KERN_DEBUG "%s: queue:%d limit:%d len:%d waitack:%d "
+                      " stopped:%d\n", wiphy_name(ar->hw->wiphy), i,
+                      ar->tx_stats[i].limit, ar->tx_stats[i].len,
+                      skb_queue_len(&ar->tx_status[i]),
+                      ieee80211_queue_stopped(ar->hw, i));
+}
+#endif /* AR9170_QUEUE_STOP_DEBUG */
+
+#ifdef AR9170_TXAGG_DEBUG
+static void ar9170_dump_tx_status_ampdu(struct ar9170 *ar)
 {
-       struct ieee80211_tx_info *txinfo;
-       unsigned int retries = 0, queue = skb_get_queue_mapping(skb);
        unsigned long flags;
 
-       spin_lock_irqsave(&ar->tx_stats_lock, flags);
-       ar->tx_stats[queue].len--;
-       if (ieee80211_queue_stopped(ar->hw, queue))
-               ieee80211_wake_queue(ar->hw, queue);
-       spin_unlock_irqrestore(&ar->tx_stats_lock, flags);
+       spin_lock_irqsave(&ar->tx_status_ampdu.lock, flags);
+       printk(KERN_DEBUG "%s: A-MPDU tx_status queue => \n",
+              wiphy_name(ar->hw->wiphy));
+       __ar9170_dump_txqueue(ar, &ar->tx_status_ampdu);
+       spin_unlock_irqrestore(&ar->tx_status_ampdu.lock, flags);
+}
+
+#endif /* AR9170_TXAGG_DEBUG */
+
+/* caller must guarantee exclusive access for _bin_ queue. */
+static void ar9170_recycle_expired(struct ar9170 *ar,
+                                  struct sk_buff_head *queue,
+                                  struct sk_buff_head *bin)
+{
+       struct sk_buff *skb, *old = NULL;
+       unsigned long flags;
+
+       spin_lock_irqsave(&queue->lock, flags);
+       while ((skb = skb_peek(queue))) {
+               struct ieee80211_tx_info *txinfo;
+               struct ar9170_tx_info *arinfo;
+
+               txinfo = IEEE80211_SKB_CB(skb);
+               arinfo = (void *) txinfo->rate_driver_data;
+
+               if (time_is_before_jiffies(arinfo->timeout)) {
+#ifdef AR9170_QUEUE_DEBUG
+                       printk(KERN_DEBUG "%s: [%ld > %ld] frame expired => "
+                              "recycle \n", wiphy_name(ar->hw->wiphy),
+                              jiffies, arinfo->timeout);
+                       ar9170_print_txheader(ar, skb);
+#endif /* AR9170_QUEUE_DEBUG */
+                       __skb_unlink(skb, queue);
+                       __skb_queue_tail(bin, skb);
+               } else {
+                       break;
+               }
+
+               if (unlikely(old == skb)) {
+                       /* bail out - queue is shot. */
+
+                       WARN_ON(1);
+                       break;
+               }
+               old = skb;
+       }
+       spin_unlock_irqrestore(&queue->lock, flags);
+}
+
+static void ar9170_tx_status(struct ar9170 *ar, struct sk_buff *skb,
+                                   u16 tx_status)
+{
+       struct ieee80211_tx_info *txinfo;
+       unsigned int retries = 0;
 
        txinfo = IEEE80211_SKB_CB(skb);
        ieee80211_tx_info_clear_status(txinfo);
@@ -248,45 +354,120 @@ void ar9170_handle_tx_status(struct ar9170 *ar, struct sk_buff *skb,
                break;
        }
 
-       if (valid_status)
-               txinfo->status.rates[0].count = retries + 1;
-
+       txinfo->status.rates[0].count = retries + 1;
        skb_pull(skb, sizeof(struct ar9170_tx_control));
        ieee80211_tx_status_irqsafe(ar->hw, skb);
 }
 
-static struct sk_buff *ar9170_find_skb_in_queue(struct ar9170 *ar,
-                                               const u8 *mac,
-                                               const u32 queue,
-                                               struct sk_buff_head *q)
+static void ar9170_tx_fake_ampdu_status(struct ar9170 *ar)
 {
-       unsigned long flags;
+       struct sk_buff_head success;
        struct sk_buff *skb;
+       unsigned int i;
+       unsigned long queue_bitmap = 0;
 
-       spin_lock_irqsave(&q->lock, flags);
-       skb_queue_walk(q, skb) {
-               struct ar9170_tx_control *txc = (void *) skb->data;
-               struct ieee80211_hdr *hdr = (void *) txc->frame_data;
-               u32 txc_queue = (le32_to_cpu(txc->phy_control) &
-                               AR9170_TX_PHY_QOS_MASK) >>
-                               AR9170_TX_PHY_QOS_SHIFT;
+       skb_queue_head_init(&success);
 
-               if  ((queue != txc_queue) ||
-                    (compare_ether_addr(ieee80211_get_DA(hdr), mac)))
-                       continue;
+       while (skb_queue_len(&ar->tx_status_ampdu) > AR9170_NUM_TX_STATUS)
+               __skb_queue_tail(&success, skb_dequeue(&ar->tx_status_ampdu));
 
-               __skb_unlink(skb, q);
-               spin_unlock_irqrestore(&q->lock, flags);
-               return skb;
+       ar9170_recycle_expired(ar, &ar->tx_status_ampdu, &success);
+
+#ifdef AR9170_TXAGG_DEBUG
+       printk(KERN_DEBUG "%s: collected %d A-MPDU frames.\n",
+              wiphy_name(ar->hw->wiphy), skb_queue_len(&success));
+       __ar9170_dump_txqueue(ar, &success);
+#endif /* AR9170_TXAGG_DEBUG */
+
+       while ((skb = __skb_dequeue(&success))) {
+               struct ieee80211_tx_info *txinfo;
+
+               queue_bitmap |= BIT(skb_get_queue_mapping(skb));
+
+               txinfo = IEEE80211_SKB_CB(skb);
+               ieee80211_tx_info_clear_status(txinfo);
+
+               txinfo->flags |= IEEE80211_TX_STAT_ACK;
+               txinfo->status.rates[0].count = 1;
+
+               skb_pull(skb, sizeof(struct ar9170_tx_control));
+               ieee80211_tx_status_irqsafe(ar->hw, skb);
+       }
+
+       for_each_set_bit(i, &queue_bitmap, BITS_PER_BYTE) {
+#ifdef AR9170_QUEUE_STOP_DEBUG
+               printk(KERN_DEBUG "%s: wake queue %d\n",
+                      wiphy_name(ar->hw->wiphy), i);
+               __ar9170_dump_txstats(ar);
+#endif /* AR9170_QUEUE_STOP_DEBUG */
+               ieee80211_wake_queue(ar->hw, i);
+       }
+
+       if (queue_bitmap)
+               ar9170_tx(ar);
+}
+
+static void ar9170_tx_ampdu_callback(struct ar9170 *ar, struct sk_buff *skb)
+{
+       struct ieee80211_tx_info *txinfo = IEEE80211_SKB_CB(skb);
+       struct ar9170_tx_info *arinfo = (void *) txinfo->rate_driver_data;
+
+       arinfo->timeout = jiffies +
+                         msecs_to_jiffies(AR9170_BA_TIMEOUT);
+
+       skb_queue_tail(&ar->tx_status_ampdu, skb);
+       ar9170_tx_fake_ampdu_status(ar);
+
+       if (atomic_dec_and_test(&ar->tx_ampdu_pending) &&
+           !list_empty(&ar->tx_ampdu_list))
+               ar9170_tx_ampdu(ar);
+}
+
+void ar9170_tx_callback(struct ar9170 *ar, struct sk_buff *skb)
+{
+       struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+       struct ar9170_tx_info *arinfo = (void *) info->rate_driver_data;
+       unsigned int queue = skb_get_queue_mapping(skb);
+       unsigned long flags;
+
+       spin_lock_irqsave(&ar->tx_stats_lock, flags);
+       ar->tx_stats[queue].len--;
+
+       if (ar->tx_stats[queue].len < AR9170_NUM_TX_LIMIT_SOFT) {
+#ifdef AR9170_QUEUE_STOP_DEBUG
+               printk(KERN_DEBUG "%s: wake queue %d\n",
+                      wiphy_name(ar->hw->wiphy), queue);
+               __ar9170_dump_txstats(ar);
+#endif /* AR9170_QUEUE_STOP_DEBUG */
+               ieee80211_wake_queue(ar->hw, queue);
+       }
+       spin_unlock_irqrestore(&ar->tx_stats_lock, flags);
+
+       if (info->flags & IEEE80211_TX_CTL_NO_ACK) {
+               ar9170_tx_status(ar, skb, AR9170_TX_STATUS_FAILED);
+       } else {
+               if (info->flags & IEEE80211_TX_CTL_AMPDU) {
+                       ar9170_tx_ampdu_callback(ar, skb);
+               } else {
+                       arinfo->timeout = jiffies +
+                                 msecs_to_jiffies(AR9170_TX_TIMEOUT);
+
+                       skb_queue_tail(&ar->tx_status[queue], skb);
+               }
+       }
+
+       if (!ar->tx_stats[queue].len &&
+           !skb_queue_empty(&ar->tx_pending[queue])) {
+               ar9170_tx(ar);
        }
-       spin_unlock_irqrestore(&q->lock, flags);
-       return NULL;
 }
 
-static struct sk_buff *ar9170_find_queued_skb(struct ar9170 *ar, const u8 *mac,
-                                             const u32 queue)
+static struct sk_buff *ar9170_get_queued_skb(struct ar9170 *ar,
+                                            const u8 *mac,
+                                            struct sk_buff_head *queue,
+                                            const u32 rate)
 {
-       struct ieee80211_sta *sta;
+       unsigned long flags;
        struct sk_buff *skb;
 
        /*
@@ -297,85 +478,130 @@ static struct sk_buff *ar9170_find_queued_skb(struct ar9170 *ar, const u8 *mac,
         * the firmware provided (-> destination MAC, and phy_control) -
         * and hope that we picked the right one...
         */
-       rcu_read_lock();
-       sta = ieee80211_find_sta(ar->hw, mac);
-
-       if (likely(sta)) {
-               struct ar9170_sta_info *sta_priv = (void *) sta->drv_priv;
-               skb = skb_dequeue(&sta_priv->tx_status[queue]);
-               rcu_read_unlock();
-               if (likely(skb))
-                       return skb;
-       } else
-               rcu_read_unlock();
-
-       /* scan the waste queue for candidates */
-       skb = ar9170_find_skb_in_queue(ar, mac, queue,
-                                      &ar->global_tx_status_waste);
-       if (!skb) {
-               /* so it still _must_ be in the global list. */
-               skb = ar9170_find_skb_in_queue(ar, mac, queue,
-                                              &ar->global_tx_status);
-       }
 
+       spin_lock_irqsave(&queue->lock, flags);
+       skb_queue_walk(queue, skb) {
+               struct ar9170_tx_control *txc = (void *) skb->data;
+               struct ieee80211_hdr *hdr = (void *) txc->frame_data;
+               u32 r;
+
+               if (mac && compare_ether_addr(ieee80211_get_DA(hdr), mac)) {
+#ifdef AR9170_QUEUE_DEBUG
+                       printk(KERN_DEBUG "%s: skip frame => DA %pM != %pM\n",
+                              wiphy_name(ar->hw->wiphy), mac,
+                              ieee80211_get_DA(hdr));
+                       ar9170_print_txheader(ar, skb);
+#endif /* AR9170_QUEUE_DEBUG */
+                       continue;
+               }
+
+               r = (le32_to_cpu(txc->phy_control) & AR9170_TX_PHY_MCS_MASK) >>
+                   AR9170_TX_PHY_MCS_SHIFT;
+
+               if ((rate != AR9170_TX_INVALID_RATE) && (r != rate)) {
 #ifdef AR9170_QUEUE_DEBUG
-       if (unlikely((!skb) && net_ratelimit())) {
-               printk(KERN_ERR "%s: ESS:[%pM] does not have any "
-                               "outstanding frames in this queue (%d).\n",
-                               wiphy_name(ar->hw->wiphy), mac, queue);
+                       printk(KERN_DEBUG "%s: skip frame => rate %d != %d\n",
+                              wiphy_name(ar->hw->wiphy), rate, r);
+                       ar9170_print_txheader(ar, skb);
+#endif /* AR9170_QUEUE_DEBUG */
+                       continue;
+               }
+
+               __skb_unlink(skb, queue);
+               spin_unlock_irqrestore(&queue->lock, flags);
+               return skb;
        }
+
+#ifdef AR9170_QUEUE_DEBUG
+       printk(KERN_ERR "%s: ESS:[%pM] does not have any "
+                       "outstanding frames in queue.\n",
+                       wiphy_name(ar->hw->wiphy), mac);
+       __ar9170_dump_txqueue(ar, queue);
 #endif /* AR9170_QUEUE_DEBUG */
-       return skb;
+       spin_unlock_irqrestore(&queue->lock, flags);
+
+       return NULL;
+}
+
+static void ar9170_handle_block_ack(struct ar9170 *ar, u16 count, u16 r)
+{
+       struct sk_buff *skb;
+       struct ieee80211_tx_info *txinfo;
+
+       while (count) {
+               skb = ar9170_get_queued_skb(ar, NULL, &ar->tx_status_ampdu, r);
+               if (!skb)
+                       break;
+
+               txinfo = IEEE80211_SKB_CB(skb);
+               ieee80211_tx_info_clear_status(txinfo);
+
+               /* FIXME: maybe more ? */
+               txinfo->status.rates[0].count = 1;
+
+               skb_pull(skb, sizeof(struct ar9170_tx_control));
+               ieee80211_tx_status_irqsafe(ar->hw, skb);
+               count--;
+       }
+
+#ifdef AR9170_TXAGG_DEBUG
+       if (count) {
+               printk(KERN_DEBUG "%s: got %d more failed mpdus, but no more "
+                      "suitable frames left in tx_status queue.\n",
+                      wiphy_name(ar->hw->wiphy), count);
+
+               ar9170_dump_tx_status_ampdu(ar);
+       }
+#endif /* AR9170_TXAGG_DEBUG */
 }
 
 /*
- * This worker tries to keep the global tx_status queue empty.
- * So we can guarantee that incoming tx_status reports for
- * unregistered stations are always synced with the actual
- * frame - which we think - belongs to.
+ * This worker tries to keeps an maintain tx_status queues.
+ * So we can guarantee that incoming tx_status reports are
+ * actually for a pending frame.
  */
 
-static void ar9170_tx_status_janitor(struct work_struct *work)
+static void ar9170_tx_janitor(struct work_struct *work)
 {
        struct ar9170 *ar = container_of(work, struct ar9170,
-                                        tx_status_janitor.work);
-       struct sk_buff *skb;
+                                        tx_janitor.work);
+       struct sk_buff_head waste;
+       unsigned int i;
+       bool resched = false;
 
        if (unlikely(!IS_STARTED(ar)))
                return ;
 
-       mutex_lock(&ar->mutex);
-       /* recycle the garbage back to mac80211... one by one. */
-       while ((skb = skb_dequeue(&ar->global_tx_status_waste))) {
+       skb_queue_head_init(&waste);
+
+       for (i = 0; i < __AR9170_NUM_TXQ; i++) {
 #ifdef AR9170_QUEUE_DEBUG
-               printk(KERN_DEBUG "%s: dispose queued frame =>\n",
-                      wiphy_name(ar->hw->wiphy));
-               ar9170_print_txheader(ar, skb);
+               printk(KERN_DEBUG "%s: garbage collector scans queue:%d\n",
+                      wiphy_name(ar->hw->wiphy), i);
+               ar9170_dump_txqueue(ar, &ar->tx_pending[i]);
+               ar9170_dump_txqueue(ar, &ar->tx_status[i]);
 #endif /* AR9170_QUEUE_DEBUG */
-               ar9170_handle_tx_status(ar, skb, false,
-                                       AR9170_TX_STATUS_FAILED);
-       }
 
-       while ((skb = skb_dequeue(&ar->global_tx_status))) {
-#ifdef AR9170_QUEUE_DEBUG
-               printk(KERN_DEBUG "%s: moving frame into waste queue =>\n",
-                      wiphy_name(ar->hw->wiphy));
+               ar9170_recycle_expired(ar, &ar->tx_status[i], &waste);
+               ar9170_recycle_expired(ar, &ar->tx_pending[i], &waste);
+               skb_queue_purge(&waste);
 
-               ar9170_print_txheader(ar, skb);
-#endif /* AR9170_QUEUE_DEBUG */
-               skb_queue_tail(&ar->global_tx_status_waste, skb);
+               if (!skb_queue_empty(&ar->tx_status[i]) ||
+                   !skb_queue_empty(&ar->tx_pending[i]))
+                       resched = true;
        }
 
-       /* recall the janitor in 100ms - if there's garbage in the can. */
-       if (skb_queue_len(&ar->global_tx_status_waste) > 0)
-               queue_delayed_work(ar->hw->workqueue, &ar->tx_status_janitor,
-                                  msecs_to_jiffies(100));
+       ar9170_tx_fake_ampdu_status(ar);
 
-       mutex_unlock(&ar->mutex);
+       if (!resched)
+               return;
+
+       ieee80211_queue_delayed_work(ar->hw,
+                                    &ar->tx_janitor,
+                                    msecs_to_jiffies(AR9170_JANITOR_DELAY));
 }
 
-static void ar9170_handle_command_response(struct ar9170 *ar,
-                                          void *buf, u32 len)
+void ar9170_handle_command_response(struct ar9170 *ar, void *buf, u32 len)
 {
        struct ar9170_cmd_response *cmd = (void *) buf;
 
@@ -399,15 +625,21 @@ static void ar9170_handle_command_response(struct ar9170 *ar,
                 */
 
                struct sk_buff *skb;
-               u32 queue = (le32_to_cpu(cmd->tx_status.rate) &
-                           AR9170_TX_PHY_QOS_MASK) >> AR9170_TX_PHY_QOS_SHIFT;
+               u32 phy = le32_to_cpu(cmd->tx_status.rate);
+               u32 q = (phy & AR9170_TX_PHY_QOS_MASK) >>
+                       AR9170_TX_PHY_QOS_SHIFT;
+#ifdef AR9170_QUEUE_DEBUG
+               printk(KERN_DEBUG "%s: recv tx_status for %pM, p:%08x, q:%d\n",
+                      wiphy_name(ar->hw->wiphy), cmd->tx_status.dst, phy, q);
+#endif /* AR9170_QUEUE_DEBUG */
 
-               skb = ar9170_find_queued_skb(ar, cmd->tx_status.dst, queue);
+               skb = ar9170_get_queued_skb(ar, cmd->tx_status.dst,
+                                           &ar->tx_status[q],
+                                           AR9170_TX_INVALID_RATE);
                if (unlikely(!skb))
                        return ;
 
-               ar9170_handle_tx_status(ar, skb, true,
-                                       le16_to_cpu(cmd->tx_status.status));
+               ar9170_tx_status(ar, skb, le16_to_cpu(cmd->tx_status.status));
                break;
                }
 
@@ -416,7 +648,7 @@ static void ar9170_handle_command_response(struct ar9170 *ar,
                 * pre-TBTT event
                 */
                if (ar->vif && ar->vif->type == NL80211_IFTYPE_AP)
-                       queue_work(ar->hw->workqueue, &ar->beacon_work);
+                       ieee80211_queue_work(ar->hw, &ar->beacon_work);
                break;
 
        case 0xc2:
@@ -435,8 +667,15 @@ static void ar9170_handle_command_response(struct ar9170 *ar,
                break;
 
        case 0xc4:
+               /* BlockACK bitmap */
+               break;
+
        case 0xc5:
                /* BlockACK events */
+               ar9170_handle_block_ack(ar,
+                                       le16_to_cpu(cmd->ba_fail_cnt.failed),
+                                       le16_to_cpu(cmd->ba_fail_cnt.rate));
+               ar9170_tx_fake_ampdu_status(ar);
                break;
 
        case 0xc6:
@@ -447,6 +686,38 @@ static void ar9170_handle_command_response(struct ar9170 *ar,
                /* retransmission issue / SIFS/EIFS collision ?! */
                break;
 
+       /* firmware debug */
+       case 0xca:
+               printk(KERN_DEBUG "ar9170 FW: %.*s\n", len - 4, (char *)buf + 4);
+               break;
+       case 0xcb:
+               len -= 4;
+
+               switch (len) {
+               case 1:
+                       printk(KERN_DEBUG "ar9170 FW: u8: %#.2x\n",
+                               *((char *)buf + 4));
+                       break;
+               case 2:
+                       printk(KERN_DEBUG "ar9170 FW: u8: %#.4x\n",
+                               le16_to_cpup((__le16 *)((char *)buf + 4)));
+                       break;
+               case 4:
+                       printk(KERN_DEBUG "ar9170 FW: u8: %#.8x\n",
+                               le32_to_cpup((__le32 *)((char *)buf + 4)));
+                       break;
+               case 8:
+                       printk(KERN_DEBUG "ar9170 FW: u8: %#.16lx\n",
+                               (unsigned long)le64_to_cpup(
+                                               (__le64 *)((char *)buf + 4)));
+                       break;
+               }
+               break;
+       case 0xcc:
+               print_hex_dump_bytes("ar9170 FW:", DUMP_PREFIX_NONE,
+                                    (char *)buf + 4, len - 4);
+               break;
+
        default:
                printk(KERN_INFO "received unhandled event %x\n", cmd->type);
                print_hex_dump_bytes("dump:", DUMP_PREFIX_NONE, buf, len);
@@ -460,7 +731,7 @@ static void ar9170_rx_reset_rx_mpdu(struct ar9170 *ar)
        ar->rx_mpdu.has_plcp = false;
 }
 
-static int ar9170_nag_limiter(struct ar9170 *ar)
+int ar9170_nag_limiter(struct ar9170 *ar)
 {
        bool print_message;
 
@@ -577,6 +848,7 @@ static int ar9170_rx_mac_status(struct ar9170 *ar,
                }
                break;
 
+       case AR9170_RX_STATUS_MODULATION_DUPOFDM:
        case AR9170_RX_STATUS_MODULATION_OFDM:
                switch (head->plcp[0] & 0xf) {
                case 0xb:
@@ -624,8 +896,7 @@ static int ar9170_rx_mac_status(struct ar9170 *ar,
                status->flag |= RX_FLAG_HT;
                break;
 
-       case AR9170_RX_STATUS_MODULATION_DUPOFDM:
-               /* XXX */
+       default:
                if (ar9170_nag_limiter(ar))
                        printk(KERN_ERR "%s: invalid modulation\n",
                               wiphy_name(ar->hw->wiphy));
@@ -792,8 +1063,10 @@ static void ar9170_handle_mpdu(struct ar9170 *ar, u8 *buf, int len)
                ar9170_rx_phy_status(ar, phy, &status);
 
        skb = ar9170_rx_copy_data(buf, mpdu_len);
-       if (likely(skb))
-               ieee80211_rx_irqsafe(ar->hw, skb, &status);
+       if (likely(skb)) {
+               memcpy(IEEE80211_SKB_RXCB(skb), &status, sizeof(status));
+               ieee80211_rx_irqsafe(ar->hw, skb);
+       }
 }
 
 void ar9170_rx(struct ar9170 *ar, struct sk_buff *skb)
@@ -959,8 +1232,8 @@ static int ar9170_op_start(struct ieee80211_hw *hw)
 
        /* reinitialize queues statistics */
        memset(&ar->tx_stats, 0, sizeof(ar->tx_stats));
-       for (i = 0; i < ARRAY_SIZE(ar->tx_stats); i++)
-               ar->tx_stats[i].limit = 8;
+       for (i = 0; i < __AR9170_NUM_TXQ; i++)
+               ar->tx_stats[i].limit = AR9170_TXQ_DEPTH;
 
        /* reset QoS defaults */
        AR9170_FILL_QUEUE(ar->edcf[0], 3, 15, 1023,  0); /* BEST EFFORT*/
@@ -969,6 +1242,11 @@ static int ar9170_op_start(struct ieee80211_hw *hw)
        AR9170_FILL_QUEUE(ar->edcf[3], 2, 3,     7, 47); /* VOICE */
        AR9170_FILL_QUEUE(ar->edcf[4], 2, 3,     7,  0); /* SPECIAL */
 
+       /* set sane AMPDU defaults */
+       ar->global_ampdu_density = 6;
+       ar->global_ampdu_factor = 3;
+
+       atomic_set(&ar->tx_ampdu_pending, 0);
        ar->bad_hw_nagger = jiffies;
 
        err = ar->open(ar);
@@ -1006,18 +1284,18 @@ out:
 static void ar9170_op_stop(struct ieee80211_hw *hw)
 {
        struct ar9170 *ar = hw->priv;
+       unsigned int i;
 
        if (IS_STARTED(ar))
                ar->state = AR9170_IDLE;
 
-       flush_workqueue(ar->hw->workqueue);
+       cancel_delayed_work_sync(&ar->tx_janitor);
+#ifdef CONFIG_AR9170_LEDS
+       cancel_delayed_work_sync(&ar->led_work);
+#endif
+       cancel_work_sync(&ar->beacon_work);
 
        mutex_lock(&ar->mutex);
-       cancel_delayed_work_sync(&ar->tx_status_janitor);
-       cancel_work_sync(&ar->filter_config_work);
-       cancel_work_sync(&ar->beacon_work);
-       skb_queue_purge(&ar->global_tx_status_waste);
-       skb_queue_purge(&ar->global_tx_status);
 
        if (IS_ACCEPTING_CMD(ar)) {
                ar9170_set_leds_state(ar, 0);
@@ -1027,51 +1305,63 @@ static void ar9170_op_stop(struct ieee80211_hw *hw)
                ar->stop(ar);
        }
 
+       for (i = 0; i < __AR9170_NUM_TXQ; i++) {
+               skb_queue_purge(&ar->tx_pending[i]);
+               skb_queue_purge(&ar->tx_status[i]);
+       }
+       skb_queue_purge(&ar->tx_status_ampdu);
+
        mutex_unlock(&ar->mutex);
 }
 
-int ar9170_op_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
+static void ar9170_tx_indicate_immba(struct ar9170 *ar, struct sk_buff *skb)
+{
+       struct ar9170_tx_control *txc = (void *) skb->data;
+
+       txc->mac_control |= cpu_to_le16(AR9170_TX_MAC_IMM_AMPDU);
+}
+
+static void ar9170_tx_copy_phy(struct ar9170 *ar, struct sk_buff *dst,
+                              struct sk_buff *src)
+{
+       struct ar9170_tx_control *dst_txc, *src_txc;
+       struct ieee80211_tx_info *dst_info, *src_info;
+       struct ar9170_tx_info *dst_arinfo, *src_arinfo;
+
+       src_txc = (void *) src->data;
+       src_info = IEEE80211_SKB_CB(src);
+       src_arinfo = (void *) src_info->rate_driver_data;
+
+       dst_txc = (void *) dst->data;
+       dst_info = IEEE80211_SKB_CB(dst);
+       dst_arinfo = (void *) dst_info->rate_driver_data;
+
+       dst_txc->phy_control = src_txc->phy_control;
+
+       /* same MCS for the whole aggregate */
+       memcpy(dst_info->driver_rates, src_info->driver_rates,
+              sizeof(dst_info->driver_rates));
+}
+
+static int ar9170_tx_prepare(struct ar9170 *ar, struct sk_buff *skb)
 {
-       struct ar9170 *ar = hw->priv;
        struct ieee80211_hdr *hdr;
        struct ar9170_tx_control *txc;
        struct ieee80211_tx_info *info;
-       struct ieee80211_rate *rate = NULL;
        struct ieee80211_tx_rate *txrate;
+       struct ar9170_tx_info *arinfo;
        unsigned int queue = skb_get_queue_mapping(skb);
-       unsigned long flags = 0;
-       struct ar9170_sta_info *sta_info = NULL;
-       u32 power, chains;
        u16 keytype = 0;
        u16 len, icv = 0;
-       int err;
-       bool tx_status;
 
-       if (unlikely(!IS_STARTED(ar)))
-               goto err_free;
+       BUILD_BUG_ON(sizeof(*arinfo) > sizeof(info->rate_driver_data));
 
        hdr = (void *)skb->data;
        info = IEEE80211_SKB_CB(skb);
        len = skb->len;
 
-       spin_lock_irqsave(&ar->tx_stats_lock, flags);
-       if (ar->tx_stats[queue].limit < ar->tx_stats[queue].len) {
-               spin_unlock_irqrestore(&ar->tx_stats_lock, flags);
-               return NETDEV_TX_OK;
-       }
-
-       ar->tx_stats[queue].len++;
-       ar->tx_stats[queue].count++;
-       if (ar->tx_stats[queue].limit == ar->tx_stats[queue].len)
-               ieee80211_stop_queue(hw, queue);
-
-       spin_unlock_irqrestore(&ar->tx_stats_lock, flags);
-
        txc = (void *)skb_push(skb, sizeof(*txc));
 
-       tx_status = (((info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) != 0) ||
-                   ((info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS) != 0));
-
        if (info->control.hw_key) {
                icv = info->control.hw_key->icv_len;
 
@@ -1087,7 +1377,7 @@ int ar9170_op_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
                        break;
                default:
                        WARN_ON(1);
-                       goto err_dequeue;
+                       goto err_out;
                }
        }
 
@@ -1104,16 +1394,60 @@ int ar9170_op_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
        if (info->flags & IEEE80211_TX_CTL_NO_ACK)
                txc->mac_control |= cpu_to_le16(AR9170_TX_MAC_NO_ACK);
 
-       if (info->flags & IEEE80211_TX_CTL_AMPDU)
-               txc->mac_control |= cpu_to_le16(AR9170_TX_MAC_AGGR);
-
        txrate = &info->control.rates[0];
-
        if (txrate->flags & IEEE80211_TX_RC_USE_CTS_PROTECT)
                txc->mac_control |= cpu_to_le16(AR9170_TX_MAC_PROT_CTS);
        else if (txrate->flags & IEEE80211_TX_RC_USE_RTS_CTS)
                txc->mac_control |= cpu_to_le16(AR9170_TX_MAC_PROT_RTS);
 
+       arinfo = (void *)info->rate_driver_data;
+       arinfo->timeout = jiffies + msecs_to_jiffies(AR9170_QUEUE_TIMEOUT);
+
+       if (!(info->flags & IEEE80211_TX_CTL_NO_ACK) &&
+            (is_valid_ether_addr(ieee80211_get_DA(hdr)))) {
+               /*
+                * WARNING:
+                * Putting the QoS queue bits into an unexplored territory is
+                * certainly not elegant.
+                *
+                * In my defense: This idea provides a reasonable way to
+                * smuggle valuable information to the tx_status callback.
+                * Also, the idea behind this bit-abuse came straight from
+                * the original driver code.
+                */
+
+               txc->phy_control |=
+                       cpu_to_le32(queue << AR9170_TX_PHY_QOS_SHIFT);
+
+               if (info->flags & IEEE80211_TX_CTL_AMPDU) {
+                       if (unlikely(!info->control.sta))
+                               goto err_out;
+
+                       txc->mac_control |= cpu_to_le16(AR9170_TX_MAC_AGGR);
+               } else {
+                       txc->mac_control |= cpu_to_le16(AR9170_TX_MAC_RATE_PROBE);
+               }
+       }
+
+       return 0;
+
+err_out:
+       skb_pull(skb, sizeof(*txc));
+       return -EINVAL;
+}
+
+static void ar9170_tx_prepare_phy(struct ar9170 *ar, struct sk_buff *skb)
+{
+       struct ar9170_tx_control *txc;
+       struct ieee80211_tx_info *info;
+       struct ieee80211_rate *rate = NULL;
+       struct ieee80211_tx_rate *txrate;
+       u32 power, chains;
+
+       txc = (void *) skb->data;
+       info = IEEE80211_SKB_CB(skb);
+       txrate = &info->control.rates[0];
+
        if (txrate->flags & IEEE80211_TX_RC_GREEN_FIELD)
                txc->phy_control |= cpu_to_le32(AR9170_TX_PHY_GREENFIELD);
 
@@ -1133,9 +1467,12 @@ int ar9170_op_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
                u32 r = txrate->idx;
                u8 *txpower;
 
+               /* heavy clip control */
+               txc->phy_control |= cpu_to_le32((r & 0x7) << 7);
+
                r <<= AR9170_TX_PHY_MCS_SHIFT;
-               if (WARN_ON(r & ~AR9170_TX_PHY_MCS_MASK))
-                       goto err_dequeue;
+               BUG_ON(r & ~AR9170_TX_PHY_MCS_MASK);
+
                txc->phy_control |= cpu_to_le32(r & AR9170_TX_PHY_MCS_MASK);
                txc->phy_control |= cpu_to_le32(AR9170_TX_PHY_MOD_HT);
 
@@ -1197,60 +1534,415 @@ int ar9170_op_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
                        chains = AR9170_TX_PHY_TXCHAIN_1;
        }
        txc->phy_control |= cpu_to_le32(chains << AR9170_TX_PHY_TXCHAIN_SHIFT);
+}
+
+static bool ar9170_tx_ampdu(struct ar9170 *ar)
+{
+       struct sk_buff_head agg;
+       struct ar9170_sta_tid *tid_info = NULL, *tmp;
+       struct sk_buff *skb, *first = NULL;
+       unsigned long flags, f2;
+       unsigned int i = 0;
+       u16 seq, queue, tmpssn;
+       bool run = false;
+
+       skb_queue_head_init(&agg);
+
+       spin_lock_irqsave(&ar->tx_ampdu_list_lock, flags);
+       if (list_empty(&ar->tx_ampdu_list)) {
+#ifdef AR9170_TXAGG_DEBUG
+               printk(KERN_DEBUG "%s: aggregation list is empty.\n",
+                      wiphy_name(ar->hw->wiphy));
+#endif /* AR9170_TXAGG_DEBUG */
+               goto out_unlock;
+       }
+
+       list_for_each_entry_safe(tid_info, tmp, &ar->tx_ampdu_list, list) {
+               if (tid_info->state != AR9170_TID_STATE_COMPLETE) {
+#ifdef AR9170_TXAGG_DEBUG
+                       printk(KERN_DEBUG "%s: dangling aggregation entry!\n",
+                              wiphy_name(ar->hw->wiphy));
+#endif /* AR9170_TXAGG_DEBUG */
+                       continue;
+               }
+
+               if (++i > 64) {
+#ifdef AR9170_TXAGG_DEBUG
+                       printk(KERN_DEBUG "%s: enough frames aggregated.\n",
+                              wiphy_name(ar->hw->wiphy));
+#endif /* AR9170_TXAGG_DEBUG */
+                       break;
+               }
+
+               queue = TID_TO_WME_AC(tid_info->tid);
+
+               if (skb_queue_len(&ar->tx_pending[queue]) >=
+                   AR9170_NUM_TX_AGG_MAX) {
+#ifdef AR9170_TXAGG_DEBUG
+                       printk(KERN_DEBUG "%s: queue %d full.\n",
+                              wiphy_name(ar->hw->wiphy), queue);
+#endif /* AR9170_TXAGG_DEBUG */
+                       continue;
+               }
+
+               list_del_init(&tid_info->list);
+
+               spin_lock_irqsave(&tid_info->queue.lock, f2);
+               tmpssn = seq = tid_info->ssn;
+               first = skb_peek(&tid_info->queue);
+
+               if (likely(first))
+                       tmpssn = ar9170_get_seq(first);
+
+               if (unlikely(tmpssn != seq)) {
+#ifdef AR9170_TXAGG_DEBUG
+                       printk(KERN_DEBUG "%s: ssn mismatch [%d != %d]\n.",
+                              wiphy_name(ar->hw->wiphy), seq, tmpssn);
+#endif /* AR9170_TXAGG_DEBUG */
+                       tid_info->ssn = tmpssn;
+               }
+
+#ifdef AR9170_TXAGG_DEBUG
+               printk(KERN_DEBUG "%s: generate A-MPDU for tid:%d ssn:%d with "
+                      "%d queued frames.\n", wiphy_name(ar->hw->wiphy),
+                      tid_info->tid, tid_info->ssn,
+                      skb_queue_len(&tid_info->queue));
+               __ar9170_dump_txqueue(ar, &tid_info->queue);
+#endif /* AR9170_TXAGG_DEBUG */
+
+               while ((skb = skb_peek(&tid_info->queue))) {
+                       if (unlikely(ar9170_get_seq(skb) != seq))
+                               break;
+
+                       __skb_unlink(skb, &tid_info->queue);
+                       tid_info->ssn = seq = GET_NEXT_SEQ(seq);
+
+                       if (unlikely(skb_get_queue_mapping(skb) != queue)) {
+#ifdef AR9170_TXAGG_DEBUG
+                               printk(KERN_DEBUG "%s: tid:%d(q:%d) queue:%d "
+                                      "!match.\n", wiphy_name(ar->hw->wiphy),
+                                      tid_info->tid,
+                                      TID_TO_WME_AC(tid_info->tid),
+                                      skb_get_queue_mapping(skb));
+#endif /* AR9170_TXAGG_DEBUG */
+                                       dev_kfree_skb_any(skb);
+                                       continue;
+                       }
+
+                       if (unlikely(first == skb)) {
+                               ar9170_tx_prepare_phy(ar, skb);
+                               __skb_queue_tail(&agg, skb);
+                               first = skb;
+                       } else {
+                               ar9170_tx_copy_phy(ar, skb, first);
+                               __skb_queue_tail(&agg, skb);
+                       }
+
+                       if (unlikely(skb_queue_len(&agg) ==
+                           AR9170_NUM_TX_AGG_MAX))
+                               break;
+               }
+
+               if (skb_queue_empty(&tid_info->queue))
+                       tid_info->active = false;
+               else
+                       list_add_tail(&tid_info->list,
+                                     &ar->tx_ampdu_list);
+
+               spin_unlock_irqrestore(&tid_info->queue.lock, f2);
+
+               if (unlikely(skb_queue_empty(&agg))) {
+#ifdef AR9170_TXAGG_DEBUG
+                       printk(KERN_DEBUG "%s: queued empty list!\n",
+                              wiphy_name(ar->hw->wiphy));
+#endif /* AR9170_TXAGG_DEBUG */
+                       continue;
+               }
 
-       if (tx_status) {
-               txc->mac_control |= cpu_to_le16(AR9170_TX_MAC_RATE_PROBE);
                /*
-                * WARNING:
-                * Putting the QoS queue bits into an unexplored territory is
-                * certainly not elegant.
-                *
-                * In my defense: This idea provides a reasonable way to
-                * smuggle valuable information to the tx_status callback.
-                * Also, the idea behind this bit-abuse came straight from
-                * the original driver code.
+                * tell the FW/HW that this is the last frame,
+                * that way it will wait for the immediate block ack.
                 */
+               ar9170_tx_indicate_immba(ar, skb_peek_tail(&agg));
 
-               txc->phy_control |=
-                       cpu_to_le32(queue << AR9170_TX_PHY_QOS_SHIFT);
+#ifdef AR9170_TXAGG_DEBUG
+               printk(KERN_DEBUG "%s: generated A-MPDU looks like this:\n",
+                      wiphy_name(ar->hw->wiphy));
+               __ar9170_dump_txqueue(ar, &agg);
+#endif /* AR9170_TXAGG_DEBUG */
 
-               if (info->control.sta) {
-                       sta_info = (void *) info->control.sta->drv_priv;
-                       skb_queue_tail(&sta_info->tx_status[queue], skb);
-               } else {
-                       skb_queue_tail(&ar->global_tx_status, skb);
+               spin_unlock_irqrestore(&ar->tx_ampdu_list_lock, flags);
+
+               spin_lock_irqsave(&ar->tx_pending[queue].lock, flags);
+               skb_queue_splice_tail_init(&agg, &ar->tx_pending[queue]);
+               spin_unlock_irqrestore(&ar->tx_pending[queue].lock, flags);
+               run = true;
+
+               spin_lock_irqsave(&ar->tx_ampdu_list_lock, flags);
+       }
+
+out_unlock:
+       spin_unlock_irqrestore(&ar->tx_ampdu_list_lock, flags);
+       __skb_queue_purge(&agg);
+
+       return run;
+}
+
+static void ar9170_tx(struct ar9170 *ar)
+{
+       struct sk_buff *skb;
+       unsigned long flags;
+       struct ieee80211_tx_info *info;
+       struct ar9170_tx_info *arinfo;
+       unsigned int i, frames, frames_failed, remaining_space;
+       int err;
+       bool schedule_garbagecollector = false;
+
+       BUILD_BUG_ON(sizeof(*arinfo) > sizeof(info->rate_driver_data));
+
+       if (unlikely(!IS_STARTED(ar)))
+               return ;
+
+       remaining_space = AR9170_TX_MAX_PENDING;
+
+       for (i = 0; i < __AR9170_NUM_TXQ; i++) {
+               spin_lock_irqsave(&ar->tx_stats_lock, flags);
+               frames = min(ar->tx_stats[i].limit - ar->tx_stats[i].len,
+                            skb_queue_len(&ar->tx_pending[i]));
+
+               if (remaining_space < frames) {
+#ifdef AR9170_QUEUE_DEBUG
+                       printk(KERN_DEBUG "%s: tx quota reached queue:%d, "
+                              "remaining slots:%d, needed:%d\n",
+                              wiphy_name(ar->hw->wiphy), i, remaining_space,
+                              frames);
+#endif /* AR9170_QUEUE_DEBUG */
+                       frames = remaining_space;
+               }
+
+               ar->tx_stats[i].len += frames;
+               ar->tx_stats[i].count += frames;
+               if (ar->tx_stats[i].len >= ar->tx_stats[i].limit) {
+#ifdef AR9170_QUEUE_DEBUG
+                       printk(KERN_DEBUG "%s: queue %d full\n",
+                              wiphy_name(ar->hw->wiphy), i);
+
+                       printk(KERN_DEBUG "%s: stuck frames: ===> \n",
+                              wiphy_name(ar->hw->wiphy));
+                       ar9170_dump_txqueue(ar, &ar->tx_pending[i]);
+                       ar9170_dump_txqueue(ar, &ar->tx_status[i]);
+#endif /* AR9170_QUEUE_DEBUG */
+
+#ifdef AR9170_QUEUE_STOP_DEBUG
+                       printk(KERN_DEBUG "%s: stop queue %d\n",
+                              wiphy_name(ar->hw->wiphy), i);
+                       __ar9170_dump_txstats(ar);
+#endif /* AR9170_QUEUE_STOP_DEBUG */
+                       ieee80211_stop_queue(ar->hw, i);
+               }
+
+               spin_unlock_irqrestore(&ar->tx_stats_lock, flags);
+
+               if (!frames)
+                       continue;
+
+               frames_failed = 0;
+               while (frames) {
+                       skb = skb_dequeue(&ar->tx_pending[i]);
+                       if (unlikely(!skb)) {
+                               frames_failed += frames;
+                               frames = 0;
+                               break;
+                       }
+
+                       info = IEEE80211_SKB_CB(skb);
+                       arinfo = (void *) info->rate_driver_data;
+
+                       /* TODO: cancel stuck frames */
+                       arinfo->timeout = jiffies +
+                                         msecs_to_jiffies(AR9170_TX_TIMEOUT);
+
+                       if (info->flags & IEEE80211_TX_CTL_AMPDU)
+                               atomic_inc(&ar->tx_ampdu_pending);
+
+#ifdef AR9170_QUEUE_DEBUG
+                       printk(KERN_DEBUG "%s: send frame q:%d =>\n",
+                              wiphy_name(ar->hw->wiphy), i);
+                       ar9170_print_txheader(ar, skb);
+#endif /* AR9170_QUEUE_DEBUG */
+
+                       err = ar->tx(ar, skb);
+                       if (unlikely(err)) {
+                               if (info->flags & IEEE80211_TX_CTL_AMPDU)
+                                       atomic_dec(&ar->tx_ampdu_pending);
+
+                               frames_failed++;
+                               dev_kfree_skb_any(skb);
+                       } else {
+                               remaining_space--;
+                               schedule_garbagecollector = true;
+                       }
+
+                       frames--;
+               }
+
+#ifdef AR9170_QUEUE_DEBUG
+               printk(KERN_DEBUG "%s: ar9170_tx report for queue %d\n",
+                      wiphy_name(ar->hw->wiphy), i);
+
+               printk(KERN_DEBUG "%s: unprocessed pending frames left:\n",
+                      wiphy_name(ar->hw->wiphy));
+               ar9170_dump_txqueue(ar, &ar->tx_pending[i]);
+#endif /* AR9170_QUEUE_DEBUG */
+
+               if (unlikely(frames_failed)) {
+#ifdef AR9170_QUEUE_DEBUG
+                       printk(KERN_DEBUG "%s: frames failed %d =>\n",
+                              wiphy_name(ar->hw->wiphy), frames_failed);
+#endif /* AR9170_QUEUE_DEBUG */
 
-                       queue_delayed_work(ar->hw->workqueue,
-                                          &ar->tx_status_janitor,
-                                          msecs_to_jiffies(100));
+                       spin_lock_irqsave(&ar->tx_stats_lock, flags);
+                       ar->tx_stats[i].len -= frames_failed;
+                       ar->tx_stats[i].count -= frames_failed;
+#ifdef AR9170_QUEUE_STOP_DEBUG
+                       printk(KERN_DEBUG "%s: wake queue %d\n",
+                              wiphy_name(ar->hw->wiphy), i);
+                       __ar9170_dump_txstats(ar);
+#endif /* AR9170_QUEUE_STOP_DEBUG */
+                       ieee80211_wake_queue(ar->hw, i);
+                       spin_unlock_irqrestore(&ar->tx_stats_lock, flags);
                }
        }
 
-       err = ar->tx(ar, skb, tx_status, 0);
-       if (unlikely(tx_status && err)) {
-               if (info->control.sta)
-                       skb_unlink(skb, &sta_info->tx_status[queue]);
-               else
-                       skb_unlink(skb, &ar->global_tx_status);
+       if (!schedule_garbagecollector)
+               return;
+
+       ieee80211_queue_delayed_work(ar->hw,
+                                    &ar->tx_janitor,
+                                    msecs_to_jiffies(AR9170_JANITOR_DELAY));
+}
+
+static bool ar9170_tx_ampdu_queue(struct ar9170 *ar, struct sk_buff *skb)
+{
+       struct ieee80211_tx_info *txinfo;
+       struct ar9170_sta_info *sta_info;
+       struct ar9170_sta_tid *agg;
+       struct sk_buff *iter;
+       unsigned long flags, f2;
+       unsigned int max;
+       u16 tid, seq, qseq;
+       bool run = false, queue = false;
+
+       tid = ar9170_get_tid(skb);
+       seq = ar9170_get_seq(skb);
+       txinfo = IEEE80211_SKB_CB(skb);
+       sta_info = (void *) txinfo->control.sta->drv_priv;
+       agg = &sta_info->agg[tid];
+       max = sta_info->ampdu_max_len;
+
+       spin_lock_irqsave(&ar->tx_ampdu_list_lock, flags);
+
+       if (unlikely(agg->state != AR9170_TID_STATE_COMPLETE)) {
+#ifdef AR9170_TXAGG_DEBUG
+               printk(KERN_DEBUG "%s: BlockACK session not fully initialized "
+                      "for ESS:%pM tid:%d state:%d.\n",
+                      wiphy_name(ar->hw->wiphy), agg->addr, agg->tid,
+                      agg->state);
+#endif /* AR9170_TXAGG_DEBUG */
+               goto err_unlock;
        }
 
-       return NETDEV_TX_OK;
+       if (!agg->active) {
+               agg->active = true;
+               agg->ssn = seq;
+               queue = true;
+       }
 
-err_dequeue:
-       spin_lock_irqsave(&ar->tx_stats_lock, flags);
-       ar->tx_stats[queue].len--;
-       ar->tx_stats[queue].count--;
-       spin_unlock_irqrestore(&ar->tx_stats_lock, flags);
+       /* check if seq is within the BA window */
+       if (unlikely(!BAW_WITHIN(agg->ssn, max, seq))) {
+#ifdef AR9170_TXAGG_DEBUG
+               printk(KERN_DEBUG "%s: frame with tid:%d seq:%d does not "
+                      "fit into BA window (%d - %d)\n",
+                      wiphy_name(ar->hw->wiphy), tid, seq, agg->ssn,
+                      (agg->ssn + max) & 0xfff);
+#endif /* AR9170_TXAGG_DEBUG */
+               goto err_unlock;
+       }
+
+       spin_lock_irqsave(&agg->queue.lock, f2);
+
+       skb_queue_reverse_walk(&agg->queue, iter) {
+               qseq = ar9170_get_seq(iter);
+
+               if (GET_NEXT_SEQ(qseq) == seq) {
+                       __skb_queue_after(&agg->queue, iter, skb);
+                       goto queued;
+               }
+       }
+
+       __skb_queue_head(&agg->queue, skb);
+
+queued:
+       spin_unlock_irqrestore(&agg->queue.lock, f2);
+
+#ifdef AR9170_TXAGG_DEBUG
+       printk(KERN_DEBUG "%s: new aggregate %p queued.\n",
+              wiphy_name(ar->hw->wiphy), skb);
+       __ar9170_dump_txqueue(ar, &agg->queue);
+#endif /* AR9170_TXAGG_DEBUG */
+
+       if (skb_queue_len(&agg->queue) >= AR9170_NUM_TX_AGG_MAX)
+               run = true;
+
+       if (queue)
+               list_add_tail(&agg->list, &ar->tx_ampdu_list);
+
+       spin_unlock_irqrestore(&ar->tx_ampdu_list_lock, flags);
+       return run;
+
+err_unlock:
+       spin_unlock_irqrestore(&ar->tx_ampdu_list_lock, flags);
+       dev_kfree_skb_irq(skb);
+       return false;
+}
+
+int ar9170_op_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
+{
+       struct ar9170 *ar = hw->priv;
+       struct ieee80211_tx_info *info;
+
+       if (unlikely(!IS_STARTED(ar)))
+               goto err_free;
+
+       if (unlikely(ar9170_tx_prepare(ar, skb)))
+               goto err_free;
+
+       info = IEEE80211_SKB_CB(skb);
+       if (info->flags & IEEE80211_TX_CTL_AMPDU) {
+               bool run = ar9170_tx_ampdu_queue(ar, skb);
+
+               if (run || !atomic_read(&ar->tx_ampdu_pending))
+                       ar9170_tx_ampdu(ar);
+       } else {
+               unsigned int queue = skb_get_queue_mapping(skb);
+
+               ar9170_tx_prepare_phy(ar, skb);
+               skb_queue_tail(&ar->tx_pending[queue], skb);
+       }
+
+       ar9170_tx(ar);
+       return NETDEV_TX_OK;
 
 err_free:
-       dev_kfree_skb(skb);
+       dev_kfree_skb_any(skb);
        return NETDEV_TX_OK;
 }
 
 static int ar9170_op_add_interface(struct ieee80211_hw *hw,
-                                  struct ieee80211_if_init_conf *conf)
+                                  struct ieee80211_vif *vif)
 {
        struct ar9170 *ar = hw->priv;
+       struct ath_common *common = &ar->common;
        int err = 0;
 
        mutex_lock(&ar->mutex);
@@ -1260,8 +1952,8 @@ static int ar9170_op_add_interface(struct ieee80211_hw *hw,
                goto unlock;
        }
 
-       ar->vif = conf->vif;
-       memcpy(ar->mac_addr, conf->mac_addr, ETH_ALEN);
+       ar->vif = vif;
+       memcpy(common->macaddr, vif->addr, ETH_ALEN);
 
        if (modparam_nohwcrypt || (ar->vif->type != NL80211_IFTYPE_STATION)) {
                ar->rx_software_decryption = true;
@@ -1269,8 +1961,7 @@ static int ar9170_op_add_interface(struct ieee80211_hw *hw,
        }
 
        ar->cur_filter = 0;
-       ar->want_filter = AR9170_MAC_REG_FTF_DEFAULTS;
-       err = ar9170_update_frame_filter(ar);
+       err = ar9170_update_frame_filter(ar, AR9170_MAC_REG_FTF_DEFAULTS);
        if (err)
                goto unlock;
 
@@ -1282,14 +1973,13 @@ unlock:
 }
 
 static void ar9170_op_remove_interface(struct ieee80211_hw *hw,
-                                      struct ieee80211_if_init_conf *conf)
+                                      struct ieee80211_vif *vif)
 {
        struct ar9170 *ar = hw->priv;
 
        mutex_lock(&ar->mutex);
        ar->vif = NULL;
-       ar->want_filter = 0;
-       ar9170_update_frame_filter(ar);
+       ar9170_update_frame_filter(ar, 0);
        ar9170_set_beacon_timers(ar);
        dev_kfree_skb(ar->beacon);
        ar->beacon = NULL;
@@ -1306,11 +1996,6 @@ static int ar9170_op_config(struct ieee80211_hw *hw, u32 changed)
 
        mutex_lock(&ar->mutex);
 
-       if (changed & IEEE80211_CONF_CHANGE_RADIO_ENABLED) {
-               /* TODO */
-               err = 0;
-       }
-
        if (changed & IEEE80211_CONF_CHANGE_LISTEN_INTERVAL) {
                /* TODO */
                err = 0;
@@ -1337,12 +2022,6 @@ static int ar9170_op_config(struct ieee80211_hw *hw, u32 changed)
                        goto out;
        }
 
-       if (changed & BSS_CHANGED_BEACON_INT) {
-               err = ar9170_set_beacon_timers(ar);
-               if (err)
-                       goto out;
-       }
-
        if (changed & IEEE80211_CONF_CHANGE_CHANNEL) {
 
                /* adjust slot time for 5 GHz */
@@ -1366,42 +2045,37 @@ out:
        return err;
 }
 
-static void ar9170_set_filters(struct work_struct *work)
+static u64 ar9170_op_prepare_multicast(struct ieee80211_hw *hw, int mc_count,
+                                      struct dev_addr_list *mclist)
 {
-       struct ar9170 *ar = container_of(work, struct ar9170,
-                                        filter_config_work);
-       int err;
-
-       if (unlikely(!IS_STARTED(ar)))
-               return ;
+       u64 mchash;
+       int i;
 
-       mutex_lock(&ar->mutex);
-       if (ar->filter_changed & AR9170_FILTER_CHANGED_PROMISC) {
-               err = ar9170_set_operating_mode(ar);
-               if (err)
-                       goto unlock;
-       }
+       /* always get broadcast frames */
+       mchash = 1ULL << (0xff >> 2);
 
-       if (ar->filter_changed & AR9170_FILTER_CHANGED_MULTICAST) {
-               err = ar9170_update_multicast(ar);
-               if (err)
-                       goto unlock;
+       for (i = 0; i < mc_count; i++) {
+               if (WARN_ON(!mclist))
+                       break;
+               mchash |= 1ULL << (mclist->dmi_addr[5] >> 2);
+               mclist = mclist->next;
        }
 
-       if (ar->filter_changed & AR9170_FILTER_CHANGED_FRAMEFILTER)
-               err = ar9170_update_frame_filter(ar);
-
-unlock:
-       mutex_unlock(&ar->mutex);
+       return mchash;
 }
 
 static void ar9170_op_configure_filter(struct ieee80211_hw *hw,
                                       unsigned int changed_flags,
                                       unsigned int *new_flags,
-                                      int mc_count, struct dev_mc_list *mclist)
+                                      u64 multicast)
 {
        struct ar9170 *ar = hw->priv;
 
+       if (unlikely(!IS_ACCEPTING_CMD(ar)))
+               return ;
+
+       mutex_lock(&ar->mutex);
+
        /* mask supported flags */
        *new_flags &= FIF_ALLMULTI | FIF_CONTROL | FIF_BCN_PRBRESP_PROMISC |
                      FIF_PROMISC_IN_BSS | FIF_FCSFAIL | FIF_PLCPFAIL;
@@ -1411,26 +2085,11 @@ static void ar9170_op_configure_filter(struct ieee80211_hw *hw,
         * then checking the error flags, later.
         */
 
-       if (changed_flags & FIF_ALLMULTI) {
-               if (*new_flags & FIF_ALLMULTI) {
-                       ar->want_mc_hash = ~0ULL;
-               } else {
-                       u64 mchash;
-                       int i;
-
-                       /* always get broadcast frames */
-                       mchash = 1ULL << (0xff>>2);
+       if (changed_flags & FIF_ALLMULTI && *new_flags & FIF_ALLMULTI)
+               multicast = ~0ULL;
 
-                       for (i = 0; i < mc_count; i++) {
-                               if (WARN_ON(!mclist))
-                                       break;
-                               mchash |= 1ULL << (mclist->dmi_addr[5] >> 2);
-                               mclist = mclist->next;
-                       }
-               ar->want_mc_hash = mchash;
-               }
-               ar->filter_changed |= AR9170_FILTER_CHANGED_MULTICAST;
-       }
+       if (multicast != ar->cur_mc_hash)
+               ar9170_update_multicast(ar, multicast);
 
        if (changed_flags & FIF_CONTROL) {
                u32 filter = AR9170_MAC_REG_FTF_PSPOLL |
@@ -1441,64 +2100,63 @@ static void ar9170_op_configure_filter(struct ieee80211_hw *hw,
                             AR9170_MAC_REG_FTF_CFE_ACK;
 
                if (*new_flags & FIF_CONTROL)
-                       ar->want_filter = ar->cur_filter | filter;
+                       filter |= ar->cur_filter;
                else
-                       ar->want_filter = ar->cur_filter & ~filter;
+                       filter &= (~ar->cur_filter);
 
-               ar->filter_changed |= AR9170_FILTER_CHANGED_FRAMEFILTER;
+               ar9170_update_frame_filter(ar, filter);
        }
 
        if (changed_flags & FIF_PROMISC_IN_BSS) {
                ar->sniffer_enabled = ((*new_flags) & FIF_PROMISC_IN_BSS) != 0;
-               ar->filter_changed |= AR9170_FILTER_CHANGED_PROMISC;
+               ar9170_set_operating_mode(ar);
        }
 
-       if (likely(IS_STARTED(ar)))
-               queue_work(ar->hw->workqueue, &ar->filter_config_work);
+       mutex_unlock(&ar->mutex);
 }
 
+
 static void ar9170_op_bss_info_changed(struct ieee80211_hw *hw,
                                       struct ieee80211_vif *vif,
                                       struct ieee80211_bss_conf *bss_conf,
                                       u32 changed)
 {
        struct ar9170 *ar = hw->priv;
+       struct ath_common *common = &ar->common;
        int err = 0;
 
        mutex_lock(&ar->mutex);
 
        if (changed & BSS_CHANGED_BSSID) {
-               memcpy(ar->bssid, bss_conf->bssid, ETH_ALEN);
+               memcpy(common->curbssid, bss_conf->bssid, ETH_ALEN);
                err = ar9170_set_operating_mode(ar);
                if (err)
                        goto out;
        }
 
-       if (changed & (BSS_CHANGED_BEACON | BSS_CHANGED_BEACON_ENABLED)) {
+       if (changed & BSS_CHANGED_BEACON_ENABLED)
+               ar->enable_beacon = bss_conf->enable_beacon;
+
+       if (changed & BSS_CHANGED_BEACON) {
                err = ar9170_update_beacon(ar);
                if (err)
                        goto out;
+       }
 
+       if (changed & (BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_BEACON |
+                      BSS_CHANGED_BEACON_INT)) {
                err = ar9170_set_beacon_timers(ar);
                if (err)
                        goto out;
        }
 
        if (changed & BSS_CHANGED_ASSOC) {
-               ar->state = bss_conf->assoc ? AR9170_ASSOCIATED : ar->state;
-
 #ifndef CONFIG_AR9170_LEDS
                /* enable assoc LED. */
                err = ar9170_set_leds_state(ar, bss_conf->assoc ? 2 : 0);
 #endif /* CONFIG_AR9170_LEDS */
        }
 
-       if (changed & BSS_CHANGED_BEACON_INT) {
-               err = ar9170_set_beacon_timers(ar);
-               if (err)
-                       goto out;
-       }
-
        if (changed & BSS_CHANGED_HT) {
                /* TODO */
                err = 0;
@@ -1524,22 +2182,30 @@ static u64 ar9170_op_get_tsf(struct ieee80211_hw *hw)
 {
        struct ar9170 *ar = hw->priv;
        int err;
-       u32 tsf_low;
-       u32 tsf_high;
        u64 tsf;
+#define NR 3
+       static const u32 addr[NR] = { AR9170_MAC_REG_TSF_H,
+                                   AR9170_MAC_REG_TSF_L,
+                                   AR9170_MAC_REG_TSF_H };
+       u32 val[NR];
+       int loops = 0;
 
        mutex_lock(&ar->mutex);
-       err = ar9170_read_reg(ar, AR9170_MAC_REG_TSF_L, &tsf_low);
-       if (!err)
-               err = ar9170_read_reg(ar, AR9170_MAC_REG_TSF_H, &tsf_high);
+
+       while (loops++ < 10) {
+               err = ar9170_read_mreg(ar, NR, addr, val);
+               if (err || val[0] == val[2])
+                       break;
+       }
+
        mutex_unlock(&ar->mutex);
 
        if (WARN_ON(err))
                return 0;
-
-       tsf = tsf_high;
-       tsf = (tsf << 32) | tsf_low;
+       tsf = val[0];
+       tsf = (tsf << 32) | val[1];
        return tsf;
+#undef NR
 }
 
 static int ar9170_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
@@ -1663,48 +2329,55 @@ out:
        return err;
 }
 
-static void ar9170_sta_notify(struct ieee80211_hw *hw,
-                             struct ieee80211_vif *vif,
-                             enum sta_notify_cmd cmd,
-                             struct ieee80211_sta *sta)
+static int ar9170_sta_add(struct ieee80211_hw *hw,
+                         struct ieee80211_vif *vif,
+                         struct ieee80211_sta *sta)
 {
        struct ar9170 *ar = hw->priv;
-       struct ar9170_sta_info *info = (void *) sta->drv_priv;
-       struct sk_buff *skb;
+       struct ar9170_sta_info *sta_info = (void *) sta->drv_priv;
        unsigned int i;
 
-       switch (cmd) {
-       case STA_NOTIFY_ADD:
-               for (i = 0; i < ar->hw->queues; i++)
-                       skb_queue_head_init(&info->tx_status[i]);
-               break;
+       memset(sta_info, 0, sizeof(*sta_info));
 
-       case STA_NOTIFY_REMOVE:
+       if (!sta->ht_cap.ht_supported)
+               return 0;
 
-               /*
-                * transfer all outstanding frames that need a tx_status
-                * reports to the global tx_status queue
-                */
+       if (sta->ht_cap.ampdu_density > ar->global_ampdu_density)
+               ar->global_ampdu_density = sta->ht_cap.ampdu_density;
 
-               for (i = 0; i < ar->hw->queues; i++) {
-                       while ((skb = skb_dequeue(&info->tx_status[i]))) {
-#ifdef AR9170_QUEUE_DEBUG
-                               printk(KERN_DEBUG "%s: queueing frame in "
-                                         "global tx_status queue =>\n",
-                                      wiphy_name(ar->hw->wiphy));
+       if (sta->ht_cap.ampdu_factor < ar->global_ampdu_factor)
+               ar->global_ampdu_factor = sta->ht_cap.ampdu_factor;
 
-                               ar9170_print_txheader(ar, skb);
-#endif /* AR9170_QUEUE_DEBUG */
-                               skb_queue_tail(&ar->global_tx_status, skb);
-                       }
-               }
-               queue_delayed_work(ar->hw->workqueue, &ar->tx_status_janitor,
-                                  msecs_to_jiffies(100));
-               break;
+       for (i = 0; i < AR9170_NUM_TID; i++) {
+               sta_info->agg[i].state = AR9170_TID_STATE_SHUTDOWN;
+               sta_info->agg[i].active = false;
+               sta_info->agg[i].ssn = 0;
+               sta_info->agg[i].tid = i;
+               INIT_LIST_HEAD(&sta_info->agg[i].list);
+               skb_queue_head_init(&sta_info->agg[i].queue);
+       }
 
-       default:
-               break;
+       sta_info->ampdu_max_len = 1 << (3 + sta->ht_cap.ampdu_factor);
+
+       return 0;
+}
+
+static int ar9170_sta_remove(struct ieee80211_hw *hw,
+                            struct ieee80211_vif *vif,
+                            struct ieee80211_sta *sta)
+{
+       struct ar9170_sta_info *sta_info = (void *) sta->drv_priv;
+       unsigned int i;
+
+       if (!sta->ht_cap.ht_supported)
+               return 0;
+
+       for (i = 0; i < AR9170_NUM_TID; i++) {
+               sta_info->agg[i].state = AR9170_TID_STATE_INVALID;
+               skb_queue_purge(&sta_info->agg[i].queue);
        }
+
+       return 0;
 }
 
 static int ar9170_get_stats(struct ieee80211_hw *hw,
@@ -1724,18 +2397,6 @@ static int ar9170_get_stats(struct ieee80211_hw *hw,
        return 0;
 }
 
-static int ar9170_get_tx_stats(struct ieee80211_hw *hw,
-                              struct ieee80211_tx_queue_stats *tx_stats)
-{
-       struct ar9170 *ar = hw->priv;
-
-       spin_lock_bh(&ar->tx_stats_lock);
-       memcpy(tx_stats, ar->tx_stats, sizeof(tx_stats[0]) * hw->queues);
-       spin_unlock_bh(&ar->tx_stats_lock);
-
-       return 0;
-}
-
 static int ar9170_conf_tx(struct ieee80211_hw *hw, u16 queue,
                          const struct ieee80211_tx_queue_params *param)
 {
@@ -1743,34 +2404,83 @@ static int ar9170_conf_tx(struct ieee80211_hw *hw, u16 queue,
        int ret;
 
        mutex_lock(&ar->mutex);
-       if ((param) && !(queue > ar->hw->queues)) {
+       if (queue < __AR9170_NUM_TXQ) {
                memcpy(&ar->edcf[ar9170_qos_hwmap[queue]],
                       param, sizeof(*param));
 
                ret = ar9170_set_qos(ar);
-       } else
+       } else {
                ret = -EINVAL;
+       }
 
        mutex_unlock(&ar->mutex);
        return ret;
 }
 
 static int ar9170_ampdu_action(struct ieee80211_hw *hw,
+                              struct ieee80211_vif *vif,
                               enum ieee80211_ampdu_mlme_action action,
                               struct ieee80211_sta *sta, u16 tid, u16 *ssn)
 {
+       struct ar9170 *ar = hw->priv;
+       struct ar9170_sta_info *sta_info = (void *) sta->drv_priv;
+       struct ar9170_sta_tid *tid_info = &sta_info->agg[tid];
+       unsigned long flags;
+
+       if (!modparam_ht)
+               return -EOPNOTSUPP;
+
        switch (action) {
+       case IEEE80211_AMPDU_TX_START:
+               spin_lock_irqsave(&ar->tx_ampdu_list_lock, flags);
+               if (tid_info->state != AR9170_TID_STATE_SHUTDOWN ||
+                   !list_empty(&tid_info->list)) {
+                       spin_unlock_irqrestore(&ar->tx_ampdu_list_lock, flags);
+#ifdef AR9170_TXAGG_DEBUG
+                       printk(KERN_INFO "%s: A-MPDU [ESS:[%pM] tid:[%d]] "
+                              "is in a very bad state!\n",
+                              wiphy_name(hw->wiphy), sta->addr, tid);
+#endif /* AR9170_TXAGG_DEBUG */
+                       return -EBUSY;
+               }
+
+               *ssn = tid_info->ssn;
+               tid_info->state = AR9170_TID_STATE_PROGRESS;
+               tid_info->active = false;
+               spin_unlock_irqrestore(&ar->tx_ampdu_list_lock, flags);
+               ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid);
+               break;
+
+       case IEEE80211_AMPDU_TX_STOP:
+               spin_lock_irqsave(&ar->tx_ampdu_list_lock, flags);
+               tid_info->state = AR9170_TID_STATE_SHUTDOWN;
+               list_del_init(&tid_info->list);
+               tid_info->active = false;
+               skb_queue_purge(&tid_info->queue);
+               spin_unlock_irqrestore(&ar->tx_ampdu_list_lock, flags);
+               ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid);
+               break;
+
+       case IEEE80211_AMPDU_TX_OPERATIONAL:
+#ifdef AR9170_TXAGG_DEBUG
+               printk(KERN_INFO "%s: A-MPDU for %pM [tid:%d] Operational.\n",
+                      wiphy_name(hw->wiphy), sta->addr, tid);
+#endif /* AR9170_TXAGG_DEBUG */
+               spin_lock_irqsave(&ar->tx_ampdu_list_lock, flags);
+               sta_info->agg[tid].state = AR9170_TID_STATE_COMPLETE;
+               spin_unlock_irqrestore(&ar->tx_ampdu_list_lock, flags);
+               break;
+
        case IEEE80211_AMPDU_RX_START:
        case IEEE80211_AMPDU_RX_STOP:
-               /*
-                * Something goes wrong -- RX locks up
-                * after a while of receiving aggregated
-                * frames -- not enabling for now.
-                */
-               return -EOPNOTSUPP;
+               /* Handled by firmware */
+               break;
+
        default:
                return -EOPNOTSUPP;
        }
+
+       return 0;
 }
 
 static const struct ieee80211_ops ar9170_ops = {
@@ -1780,14 +2490,15 @@ static const struct ieee80211_ops ar9170_ops = {
        .add_interface          = ar9170_op_add_interface,
        .remove_interface       = ar9170_op_remove_interface,
        .config                 = ar9170_op_config,
+       .prepare_multicast      = ar9170_op_prepare_multicast,
        .configure_filter       = ar9170_op_configure_filter,
        .conf_tx                = ar9170_conf_tx,
        .bss_info_changed       = ar9170_op_bss_info_changed,
        .get_tsf                = ar9170_op_get_tsf,
        .set_key                = ar9170_set_key,
-       .sta_notify             = ar9170_sta_notify,
+       .sta_add                = ar9170_sta_add,
+       .sta_remove             = ar9170_sta_remove,
        .get_stats              = ar9170_get_stats,
-       .get_tx_stats           = ar9170_get_tx_stats,
        .ampdu_action           = ar9170_ampdu_action,
 };
 
@@ -1801,7 +2512,7 @@ void *ar9170_alloc(size_t priv_size)
        /*
         * this buffer is used for rx stream reconstruction.
         * Under heavy load this device (or the transport layer?)
-        * tends to split the streams into seperate rx descriptors.
+        * tends to split the streams into separate rx descriptors.
         */
 
        skb = __dev_alloc_skb(AR9170_MAX_RX_BUFFER_SIZE, GFP_KERNEL);
@@ -1819,12 +2530,16 @@ void *ar9170_alloc(size_t priv_size)
        mutex_init(&ar->mutex);
        spin_lock_init(&ar->cmdlock);
        spin_lock_init(&ar->tx_stats_lock);
-       skb_queue_head_init(&ar->global_tx_status);
-       skb_queue_head_init(&ar->global_tx_status_waste);
+       spin_lock_init(&ar->tx_ampdu_list_lock);
+       skb_queue_head_init(&ar->tx_status_ampdu);
+       for (i = 0; i < __AR9170_NUM_TXQ; i++) {
+               skb_queue_head_init(&ar->tx_status[i]);
+               skb_queue_head_init(&ar->tx_pending[i]);
+       }
        ar9170_rx_reset_rx_mpdu(ar);
-       INIT_WORK(&ar->filter_config_work, ar9170_set_filters);
        INIT_WORK(&ar->beacon_work, ar9170_new_beacon);
-       INIT_DELAYED_WORK(&ar->tx_status_janitor, ar9170_tx_status_janitor);
+       INIT_DELAYED_WORK(&ar->tx_janitor, ar9170_tx_janitor);
+       INIT_LIST_HEAD(&ar->tx_ampdu_list);
 
        /* all hw supports 2.4 GHz, so set channel to 1 by default */
        ar->channel = &ar9170_2ghz_chantable[0];
@@ -1838,6 +2553,13 @@ void *ar9170_alloc(size_t priv_size)
                         IEEE80211_HW_SIGNAL_DBM |
                         IEEE80211_HW_NOISE_DBM;
 
+       if (modparam_ht) {
+               ar->hw->flags |= IEEE80211_HW_AMPDU_AGGREGATION;
+       } else {
+               ar9170_band_2GHz.ht_cap.ht_supported = false;
+               ar9170_band_5GHz.ht_cap.ht_supported = false;
+       }
+
        ar->hw->queues = __AR9170_NUM_TXQ;
        ar->hw->extra_tx_headroom = 8;
        ar->hw->sta_data_size = sizeof(struct ar9170_sta_info);
@@ -1859,10 +2581,11 @@ static int ar9170_read_eeprom(struct ar9170 *ar)
 {
 #define RW     8       /* number of words to read at once */
 #define RB     (sizeof(u32) * RW)
-       DECLARE_MAC_BUF(mbuf);
+       struct ath_regulatory *regulatory = &ar->common.regulatory;
        u8 *eeprom = (void *)&ar->eeprom;
        u8 *addr = ar->eeprom.mac_address;
        __le32 offsets[RW];
+       unsigned int rx_streams, tx_streams, tx_params = 0;
        int i, j, err, bands = 0;
 
        BUILD_BUG_ON(sizeof(ar->eeprom) & 3);
@@ -1899,6 +2622,20 @@ static int ar9170_read_eeprom(struct ar9170 *ar)
                ar->hw->wiphy->bands[IEEE80211_BAND_5GHZ] = &ar9170_band_5GHz;
                bands++;
        }
+
+       rx_streams = hweight8(ar->eeprom.rx_mask);
+       tx_streams = hweight8(ar->eeprom.tx_mask);
+
+       if (rx_streams != tx_streams)
+               tx_params = IEEE80211_HT_MCS_TX_RX_DIFF;
+
+       if (tx_streams >= 1 && tx_streams <= IEEE80211_HT_MCS_TX_MAX_STREAMS)
+               tx_params = (tx_streams - 1) <<
+                           IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT;
+
+       ar9170_band_2GHz.ht_cap.mcs.tx_params |= tx_params;
+       ar9170_band_5GHz.ht_cap.mcs.tx_params |= tx_params;
+
        /*
         * I measured this, a bandswitch takes roughly
         * 135 ms and a frequency switch about 80.
@@ -1911,8 +2648,8 @@ static int ar9170_read_eeprom(struct ar9170 *ar)
        else
                ar->hw->channel_change_time = 80 * 1000;
 
-       ar->regulatory.current_rd = le16_to_cpu(ar->eeprom.reg_domain[0]);
-       ar->regulatory.current_rd_ext = le16_to_cpu(ar->eeprom.reg_domain[1]);
+       regulatory->current_rd = le16_to_cpu(ar->eeprom.reg_domain[0]);
+       regulatory->current_rd_ext = le16_to_cpu(ar->eeprom.reg_domain[1]);
 
        /* second part of wiphy init */
        SET_IEEE80211_PERM_ADDR(ar->hw, addr);
@@ -1926,11 +2663,12 @@ static int ar9170_reg_notifier(struct wiphy *wiphy,
        struct ieee80211_hw *hw = wiphy_to_ieee80211_hw(wiphy);
        struct ar9170 *ar = hw->priv;
 
-       return ath_reg_notifier_apply(wiphy, request, &ar->regulatory);
+       return ath_reg_notifier_apply(wiphy, request, &ar->common.regulatory);
 }
 
 int ar9170_register(struct ar9170 *ar, struct device *pdev)
 {
+       struct ath_regulatory *regulatory = &ar->common.regulatory;
        int err;
 
        /* try to read EEPROM, init MAC addr */
@@ -1938,7 +2676,7 @@ int ar9170_register(struct ar9170 *ar, struct device *pdev)
        if (err)
                goto err_out;
 
-       err = ath_regd_init(&ar->regulatory, ar->hw->wiphy,
+       err = ath_regd_init(regulatory, ar->hw->wiphy,
                            ar9170_reg_notifier);
        if (err)
                goto err_out;
@@ -1947,8 +2685,8 @@ int ar9170_register(struct ar9170 *ar, struct device *pdev)
        if (err)
                goto err_out;
 
-       if (!ath_is_world_regd(&ar->regulatory))
-               regulatory_hint(ar->hw->wiphy, ar->regulatory.alpha2);
+       if (!ath_is_world_regd(regulatory))
+               regulatory_hint(ar->hw->wiphy, regulatory->alpha2);
 
        err = ar9170_init_leds(ar);
        if (err)