x86/amd-iommu: Dump fault entry on DTE error

[safe/jmp/linux-2.6] / arch / x86 / kernel / amd_iommu.c
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c

index 8ff02ee..364c6de 100644 (file)
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -58,6 +58,13 @@ static struct dma_ops_domain *find_protection_domain(u16 devid);
  static u64* alloc_pte(struct protection_domain *dom,
                       unsigned long address, u64
                       **pte_page, gfp_t gfp);
+static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
+                                     unsigned long start_page,
+                                     unsigned int pages);
+
+#ifndef BUS_NOTIFY_UNBOUND_DRIVER
+#define BUS_NOTIFY_UNBOUND_DRIVER 0x0005
+#endif
  
  #ifdef CONFIG_AMD_IOMMU_STATS
  
@@ -131,6 +138,15 @@ static int iommu_has_npcache(struct amd_iommu *iommu)
   *
   ****************************************************************************/
  
+static void dump_dte_entry(u16 devid)
+{
+       int i;
+
+       for (i = 0; i < 8; ++i)
+               pr_err("AMD-Vi: DTE[%d]: %08x\n", i,
+                       amd_iommu_dev_table[devid].data[i]);
+}
+
  static void iommu_print_event(void *__evt)
  {
         u32 *event = __evt;
@@ -148,6 +164,7 @@ static void iommu_print_event(void *__evt)
                        "address=0x%016llx flags=0x%04x]\n",
                        PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
                        address, flags);
+               dump_dte_entry(devid);
                 break;
         case EVENT_TYPE_IO_FAULT:
                 printk("IO_PAGE_FAULT device=%02x:%02x.%x "
@@ -215,7 +232,7 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
  {
         struct amd_iommu *iommu;
  
-       list_for_each_entry(iommu, &amd_iommu_list, list)
+       for_each_iommu(iommu)
                 iommu_poll_events(iommu);
  
         return IRQ_HANDLED;
@@ -427,6 +444,16 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
         iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
  }
  
+/* Flush the whole IO/TLB for a given protection domain - including PDE */
+static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid)
+{
+       u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
+
+       INC_STATS_COUNTER(domain_flush_single);
+
+       iommu_queue_inv_iommu_pages(iommu, address, domid, 1, 1);
+}
+
  /*
   * This function is used to flush the IO/TLB for a given protection domain
   * on every IOMMU in the system
@@ -442,7 +469,7 @@ static void iommu_flush_domain(u16 domid)
         __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
                                       domid, 1, 1);
  
-       list_for_each_entry(iommu, &amd_iommu_list, list) {
+       for_each_iommu(iommu) {
                 spin_lock_irqsave(&iommu->lock, flags);
                 __iommu_queue_command(iommu, &cmd);
                 __iommu_completion_wait(iommu);
@@ -451,6 +478,35 @@ static void iommu_flush_domain(u16 domid)
         }
  }
  
+void amd_iommu_flush_all_domains(void)
+{
+       int i;
+
+       for (i = 1; i < MAX_DOMAIN_ID; ++i) {
+               if (!test_bit(i, amd_iommu_pd_alloc_bitmap))
+                       continue;
+               iommu_flush_domain(i);
+       }
+}
+
+void amd_iommu_flush_all_devices(void)
+{
+       struct amd_iommu *iommu;
+       int i;
+
+       for (i = 0; i <= amd_iommu_last_bdf; ++i) {
+               if (amd_iommu_pd_table[i] == NULL)
+                       continue;
+
+               iommu = amd_iommu_rlookup_table[i];
+               if (!iommu)
+                       continue;
+
+               iommu_queue_inv_dev_entry(iommu, i);
+               iommu_completion_wait(iommu);
+       }
+}
+
  /****************************************************************************
   *
   * The functions below are used the create the page table mappings for
@@ -621,14 +677,46 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
   */
  
  /*
+ * This function checks if there is a PTE for a given dma address. If
+ * there is one, it returns the pointer to it.
+ */
+static u64* fetch_pte(struct protection_domain *domain,
+                     unsigned long address)
+{
+       u64 *pte;
+
+       pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(address)];
+
+       if (!IOMMU_PTE_PRESENT(*pte))
+               return NULL;
+
+       pte = IOMMU_PTE_PAGE(*pte);
+       pte = &pte[IOMMU_PTE_L1_INDEX(address)];
+
+       if (!IOMMU_PTE_PRESENT(*pte))
+               return NULL;
+
+       pte = IOMMU_PTE_PAGE(*pte);
+       pte = &pte[IOMMU_PTE_L0_INDEX(address)];
+
+       return pte;
+}
+
+/*
   * This function is used to add a new aperture range to an existing
   * aperture in case of dma_ops domain allocation or address allocation
   * failure.
   */
-static int alloc_new_range(struct dma_ops_domain *dma_dom,
+static int alloc_new_range(struct amd_iommu *iommu,
+                          struct dma_ops_domain *dma_dom,
                            bool populate, gfp_t gfp)
  {
         int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
+       int i;
+
+#ifdef CONFIG_IOMMU_STRESS
+       populate = false;
+#endif
  
         if (index >= APERTURE_MAX_RANGES)
                 return -ENOMEM;
@@ -662,6 +750,33 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
  
         dma_dom->aperture_size += APERTURE_RANGE_SIZE;
  
+       /* Intialize the exclusion range if necessary */
+       if (iommu->exclusion_start &&
+           iommu->exclusion_start >= dma_dom->aperture[index]->offset &&
+           iommu->exclusion_start < dma_dom->aperture_size) {
+               unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
+               int pages = iommu_num_pages(iommu->exclusion_start,
+                                           iommu->exclusion_length,
+                                           PAGE_SIZE);
+               dma_ops_reserve_addresses(dma_dom, startpage, pages);
+       }
+
+       /*
+        * Check for areas already mapped as present in the new aperture
+        * range and mark those pages as reserved in the allocator. Such
+        * mappings may already exist as a result of requested unity
+        * mappings for devices.
+        */
+       for (i = dma_dom->aperture[index]->offset;
+            i < dma_dom->aperture_size;
+            i += PAGE_SIZE) {
+               u64 *pte = fetch_pte(&dma_dom->domain, i);
+               if (!pte || !IOMMU_PTE_PRESENT(*pte))
+                       continue;
+
+               dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1);
+       }
+
         return 0;
  
  out_free:
@@ -725,6 +840,11 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev,
  {
         unsigned long address;
  
+#ifdef CONFIG_IOMMU_STRESS
+       dom->next_address = 0;
+       dom->need_flush = true;
+#endif
+
         address = dma_ops_area_alloc(dev, dom, pages, align_mask,
                                      dma_mask, dom->next_address);
  
@@ -757,6 +877,11 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
  
         BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);
  
+#ifdef CONFIG_IOMMU_STRESS
+       if (i < 4)
+               return;
+#endif
+
         if (address >= dom->next_address)
                 dom->need_flush = true;
  
@@ -881,17 +1006,10 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
   * It also intializes the page table and the address allocator data
   * structures required for the dma_ops interface
   */
-static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
-                                                  unsigned order)
+static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)
  {
         struct dma_ops_domain *dma_dom;
  
-       /*
-        * Currently the DMA aperture must be between 32 MB and 1GB in size
-        */
-       if ((order < 25) || (order > 30))
-               return NULL;
-
         dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
         if (!dma_dom)
                 return NULL;
@@ -911,7 +1029,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
         dma_dom->need_flush = false;
         dma_dom->target_dev = 0xffff;
  
-       if (alloc_new_range(dma_dom, true, GFP_KERNEL))
+       if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL))
                 goto free_dma_dom;
  
         /*
@@ -921,15 +1039,6 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
         dma_dom->aperture[0]->bitmap[0] = 1;
         dma_dom->next_address = 0;
  
-       /* Intialize the exclusion range if necessary */
-       if (iommu->exclusion_start &&
-           iommu->exclusion_start < dma_dom->aperture_size) {
-               unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
-               int pages = iommu_num_pages(iommu->exclusion_start,
-                                           iommu->exclusion_length,
-                                           PAGE_SIZE);
-               dma_ops_reserve_addresses(dma_dom, startpage, pages);
-       }
  
         return dma_dom;
  
@@ -989,7 +1098,13 @@ static void attach_device(struct amd_iommu *iommu,
         amd_iommu_pd_table[devid] = domain;
         write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
  
+       /*
+        * We might boot into a crash-kernel here. The crashed kernel
+        * left the caches in the IOMMU dirty. So we have to flush
+        * here to evict all dirty stuff.
+        */
         iommu_queue_inv_dev_entry(iommu, devid);
+       iommu_flush_tlb_pde(iommu, domain->id);
  }
  
  /*
@@ -1038,7 +1153,6 @@ static int device_change_notifier(struct notifier_block *nb,
         struct protection_domain *domain;
         struct dma_ops_domain *dma_domain;
         struct amd_iommu *iommu;
-       int order = amd_iommu_aperture_order;
         unsigned long flags;
  
         if (devid > amd_iommu_last_bdf)
@@ -1057,17 +1171,7 @@ static int device_change_notifier(struct notifier_block *nb,
                           "to a non-dma-ops domain\n", dev_name(dev));
  
         switch (action) {
-       case BUS_NOTIFY_BOUND_DRIVER:
-               if (domain)
-                       goto out;
-               dma_domain = find_protection_domain(devid);
-               if (!dma_domain)
-                       dma_domain = iommu->default_dom;
-               attach_device(iommu, &dma_domain->domain, devid);
-               printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
-                      "device %s\n", dma_domain->domain.id, dev_name(dev));
-               break;
-       case BUS_NOTIFY_UNBIND_DRIVER:
+       case BUS_NOTIFY_UNBOUND_DRIVER:
                 if (!domain)
                         goto out;
                 detach_device(domain, devid);
@@ -1077,7 +1181,7 @@ static int device_change_notifier(struct notifier_block *nb,
                 dma_domain = find_protection_domain(devid);
                 if (dma_domain)
                         goto out;
-               dma_domain = dma_ops_domain_alloc(iommu, order);
+               dma_domain = dma_ops_domain_alloc(iommu);
                 if (!dma_domain)
                         goto out;
                 dma_domain->target_dev = devid;
@@ -1098,7 +1202,7 @@ out:
         return 0;
  }
  
-struct notifier_block device_nb = {
+static struct notifier_block device_nb = {
         .notifier_call = device_change_notifier,
  };
  
@@ -1188,8 +1292,8 @@ static int get_device_resources(struct device *dev,
                         dma_dom = (*iommu)->default_dom;
                 *domain = &dma_dom->domain;
                 attach_device(*iommu, *domain, *bdf);
-               printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
-                               "device %s\n", (*domain)->id, dev_name(dev));
+               DUMP_printk("Using protection domain %d for device %s\n",
+                           (*domain)->id, dev_name(dev));
         }
  
         if (domain_for_device(_bdf) == NULL)
@@ -1354,10 +1458,26 @@ static dma_addr_t __map_single(struct device *dev,
         if (align)
                 align_mask = (1UL << get_order(size)) - 1;
  
+retry:
         address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
                                           dma_mask);
-       if (unlikely(address == bad_dma_address))
-               goto out;
+       if (unlikely(address == bad_dma_address)) {
+               /*
+                * setting next_address here will let the address
+                * allocator only scan the new allocated range in the
+                * first run. This is a small optimization.
+                */
+               dma_dom->next_address = dma_dom->aperture_size;
+
+               if (alloc_new_range(iommu, dma_dom, false, GFP_ATOMIC))
+                       goto out;
+
+               /*
+                * aperture was sucessfully enlarged by 128 MB, try
+                * allocation again
+                */
+               goto retry;
+       }
  
         start = address;
         for (i = 0; i < pages; ++i) {
@@ -1653,7 +1773,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
         flag |= __GFP_ZERO;
         virt_addr = (void *)__get_free_pages(flag, get_order(size));
         if (!virt_addr)
-               return 0;
+               return NULL;
  
         paddr = virt_to_phys(virt_addr);
  
@@ -1673,8 +1793,10 @@ static void *alloc_coherent(struct device *dev, size_t size,
         *dma_addr = __map_single(dev, iommu, domain->priv, paddr,
                                  size, DMA_BIDIRECTIONAL, true, dma_mask);
  
-       if (*dma_addr == bad_dma_address)
+       if (*dma_addr == bad_dma_address) {
+               spin_unlock_irqrestore(&domain->lock, flags);
                 goto out_free;
+       }
  
         iommu_completion_wait(iommu);
  
@@ -1761,7 +1883,6 @@ static void prealloc_protection_domains(void)
         struct pci_dev *dev = NULL;
         struct dma_ops_domain *dma_dom;
         struct amd_iommu *iommu;
-       int order = amd_iommu_aperture_order;
         u16 devid;
  
         while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
@@ -1774,7 +1895,7 @@ static void prealloc_protection_domains(void)
                 iommu = amd_iommu_rlookup_table[devid];
                 if (!iommu)
                         continue;
-               dma_dom = dma_ops_domain_alloc(iommu, order);
+               dma_dom = dma_ops_domain_alloc(iommu);
                 if (!dma_dom)
                         continue;
                 init_unity_mappings_for_device(dma_dom, devid);
@@ -1800,7 +1921,6 @@ static struct dma_map_ops amd_iommu_dma_ops = {
  int __init amd_iommu_init_dma_ops(void)
  {
         struct amd_iommu *iommu;
-       int order = amd_iommu_aperture_order;
         int ret;
  
         /*
@@ -1808,8 +1928,8 @@ int __init amd_iommu_init_dma_ops(void)
          * found in the system. Devices not assigned to any other
          * protection domain will be assigned to the default one.
          */
-       list_for_each_entry(iommu, &amd_iommu_list, list) {
-               iommu->default_dom = dma_ops_domain_alloc(iommu, order);
+       for_each_iommu(iommu) {
+               iommu->default_dom = dma_ops_domain_alloc(iommu);
                 if (iommu->default_dom == NULL)
                         return -ENOMEM;
                 iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
@@ -1846,7 +1966,7 @@ int __init amd_iommu_init_dma_ops(void)
  
  free_domains:
  
-       list_for_each_entry(iommu, &amd_iommu_list, list) {
+       for_each_iommu(iommu) {
                 if (iommu->default_dom)
                         dma_ops_domain_free(iommu->default_dom);
         }
@@ -1978,7 +2098,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
  
         old_domain = domain_for_device(devid);
         if (old_domain)
-               return -EBUSY;
+               detach_device(old_domain, devid);
  
         attach_device(iommu, domain, devid);