x86/amd-iommu: Dump fault entry on DTE error
[safe/jmp/linux-2.6] / arch / x86 / kernel / amd_iommu.c
index 8ff02ee..364c6de 100644 (file)
@@ -58,6 +58,13 @@ static struct dma_ops_domain *find_protection_domain(u16 devid);
 static u64* alloc_pte(struct protection_domain *dom,
                      unsigned long address, u64
                      **pte_page, gfp_t gfp);
+static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
+                                     unsigned long start_page,
+                                     unsigned int pages);
+
+#ifndef BUS_NOTIFY_UNBOUND_DRIVER
+#define BUS_NOTIFY_UNBOUND_DRIVER 0x0005
+#endif
 
 #ifdef CONFIG_AMD_IOMMU_STATS
 
@@ -131,6 +138,15 @@ static int iommu_has_npcache(struct amd_iommu *iommu)
  *
  ****************************************************************************/
 
+static void dump_dte_entry(u16 devid)
+{
+       int i;
+
+       for (i = 0; i < 8; ++i)
+               pr_err("AMD-Vi: DTE[%d]: %08x\n", i,
+                       amd_iommu_dev_table[devid].data[i]);
+}
+
 static void iommu_print_event(void *__evt)
 {
        u32 *event = __evt;
@@ -148,6 +164,7 @@ static void iommu_print_event(void *__evt)
                       "address=0x%016llx flags=0x%04x]\n",
                       PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
                       address, flags);
+               dump_dte_entry(devid);
                break;
        case EVENT_TYPE_IO_FAULT:
                printk("IO_PAGE_FAULT device=%02x:%02x.%x "
@@ -215,7 +232,7 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
 {
        struct amd_iommu *iommu;
 
-       list_for_each_entry(iommu, &amd_iommu_list, list)
+       for_each_iommu(iommu)
                iommu_poll_events(iommu);
 
        return IRQ_HANDLED;
@@ -427,6 +444,16 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
        iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
 }
 
+/* Flush the whole IO/TLB for a given protection domain - including PDE */
+static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid)
+{
+       u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
+
+       INC_STATS_COUNTER(domain_flush_single);
+
+       iommu_queue_inv_iommu_pages(iommu, address, domid, 1, 1);
+}
+
 /*
  * This function is used to flush the IO/TLB for a given protection domain
  * on every IOMMU in the system
@@ -442,7 +469,7 @@ static void iommu_flush_domain(u16 domid)
        __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
                                      domid, 1, 1);
 
-       list_for_each_entry(iommu, &amd_iommu_list, list) {
+       for_each_iommu(iommu) {
                spin_lock_irqsave(&iommu->lock, flags);
                __iommu_queue_command(iommu, &cmd);
                __iommu_completion_wait(iommu);
@@ -451,6 +478,35 @@ static void iommu_flush_domain(u16 domid)
        }
 }
 
+void amd_iommu_flush_all_domains(void)
+{
+       int i;
+
+       for (i = 1; i < MAX_DOMAIN_ID; ++i) {
+               if (!test_bit(i, amd_iommu_pd_alloc_bitmap))
+                       continue;
+               iommu_flush_domain(i);
+       }
+}
+
+void amd_iommu_flush_all_devices(void)
+{
+       struct amd_iommu *iommu;
+       int i;
+
+       for (i = 0; i <= amd_iommu_last_bdf; ++i) {
+               if (amd_iommu_pd_table[i] == NULL)
+                       continue;
+
+               iommu = amd_iommu_rlookup_table[i];
+               if (!iommu)
+                       continue;
+
+               iommu_queue_inv_dev_entry(iommu, i);
+               iommu_completion_wait(iommu);
+       }
+}
+
 /****************************************************************************
  *
  * The functions below are used the create the page table mappings for
@@ -621,14 +677,46 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
  */
 
 /*
+ * This function checks if there is a PTE for a given dma address. If
+ * there is one, it returns the pointer to it.
+ */
+static u64* fetch_pte(struct protection_domain *domain,
+                     unsigned long address)
+{
+       u64 *pte;
+
+       pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(address)];
+
+       if (!IOMMU_PTE_PRESENT(*pte))
+               return NULL;
+
+       pte = IOMMU_PTE_PAGE(*pte);
+       pte = &pte[IOMMU_PTE_L1_INDEX(address)];
+
+       if (!IOMMU_PTE_PRESENT(*pte))
+               return NULL;
+
+       pte = IOMMU_PTE_PAGE(*pte);
+       pte = &pte[IOMMU_PTE_L0_INDEX(address)];
+
+       return pte;
+}
+
+/*
  * This function is used to add a new aperture range to an existing
  * aperture in case of dma_ops domain allocation or address allocation
  * failure.
  */
-static int alloc_new_range(struct dma_ops_domain *dma_dom,
+static int alloc_new_range(struct amd_iommu *iommu,
+                          struct dma_ops_domain *dma_dom,
                           bool populate, gfp_t gfp)
 {
        int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
+       int i;
+
+#ifdef CONFIG_IOMMU_STRESS
+       populate = false;
+#endif
 
        if (index >= APERTURE_MAX_RANGES)
                return -ENOMEM;
@@ -662,6 +750,33 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
 
        dma_dom->aperture_size += APERTURE_RANGE_SIZE;
 
+       /* Intialize the exclusion range if necessary */
+       if (iommu->exclusion_start &&
+           iommu->exclusion_start >= dma_dom->aperture[index]->offset &&
+           iommu->exclusion_start < dma_dom->aperture_size) {
+               unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
+               int pages = iommu_num_pages(iommu->exclusion_start,
+                                           iommu->exclusion_length,
+                                           PAGE_SIZE);
+               dma_ops_reserve_addresses(dma_dom, startpage, pages);
+       }
+
+       /*
+        * Check for areas already mapped as present in the new aperture
+        * range and mark those pages as reserved in the allocator. Such
+        * mappings may already exist as a result of requested unity
+        * mappings for devices.
+        */
+       for (i = dma_dom->aperture[index]->offset;
+            i < dma_dom->aperture_size;
+            i += PAGE_SIZE) {
+               u64 *pte = fetch_pte(&dma_dom->domain, i);
+               if (!pte || !IOMMU_PTE_PRESENT(*pte))
+                       continue;
+
+               dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1);
+       }
+
        return 0;
 
 out_free:
@@ -725,6 +840,11 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev,
 {
        unsigned long address;
 
+#ifdef CONFIG_IOMMU_STRESS
+       dom->next_address = 0;
+       dom->need_flush = true;
+#endif
+
        address = dma_ops_area_alloc(dev, dom, pages, align_mask,
                                     dma_mask, dom->next_address);
 
@@ -757,6 +877,11 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
 
        BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);
 
+#ifdef CONFIG_IOMMU_STRESS
+       if (i < 4)
+               return;
+#endif
+
        if (address >= dom->next_address)
                dom->need_flush = true;
 
@@ -881,17 +1006,10 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
  * It also intializes the page table and the address allocator data
  * structures required for the dma_ops interface
  */
-static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
-                                                  unsigned order)
+static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)
 {
        struct dma_ops_domain *dma_dom;
 
-       /*
-        * Currently the DMA aperture must be between 32 MB and 1GB in size
-        */
-       if ((order < 25) || (order > 30))
-               return NULL;
-
        dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
        if (!dma_dom)
                return NULL;
@@ -911,7 +1029,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
        dma_dom->need_flush = false;
        dma_dom->target_dev = 0xffff;
 
-       if (alloc_new_range(dma_dom, true, GFP_KERNEL))
+       if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL))
                goto free_dma_dom;
 
        /*
@@ -921,15 +1039,6 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
        dma_dom->aperture[0]->bitmap[0] = 1;
        dma_dom->next_address = 0;
 
-       /* Intialize the exclusion range if necessary */
-       if (iommu->exclusion_start &&
-           iommu->exclusion_start < dma_dom->aperture_size) {
-               unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
-               int pages = iommu_num_pages(iommu->exclusion_start,
-                                           iommu->exclusion_length,
-                                           PAGE_SIZE);
-               dma_ops_reserve_addresses(dma_dom, startpage, pages);
-       }
 
        return dma_dom;
 
@@ -989,7 +1098,13 @@ static void attach_device(struct amd_iommu *iommu,
        amd_iommu_pd_table[devid] = domain;
        write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
 
+       /*
+        * We might boot into a crash-kernel here. The crashed kernel
+        * left the caches in the IOMMU dirty. So we have to flush
+        * here to evict all dirty stuff.
+        */
        iommu_queue_inv_dev_entry(iommu, devid);
+       iommu_flush_tlb_pde(iommu, domain->id);
 }
 
 /*
@@ -1038,7 +1153,6 @@ static int device_change_notifier(struct notifier_block *nb,
        struct protection_domain *domain;
        struct dma_ops_domain *dma_domain;
        struct amd_iommu *iommu;
-       int order = amd_iommu_aperture_order;
        unsigned long flags;
 
        if (devid > amd_iommu_last_bdf)
@@ -1057,17 +1171,7 @@ static int device_change_notifier(struct notifier_block *nb,
                          "to a non-dma-ops domain\n", dev_name(dev));
 
        switch (action) {
-       case BUS_NOTIFY_BOUND_DRIVER:
-               if (domain)
-                       goto out;
-               dma_domain = find_protection_domain(devid);
-               if (!dma_domain)
-                       dma_domain = iommu->default_dom;
-               attach_device(iommu, &dma_domain->domain, devid);
-               printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
-                      "device %s\n", dma_domain->domain.id, dev_name(dev));
-               break;
-       case BUS_NOTIFY_UNBIND_DRIVER:
+       case BUS_NOTIFY_UNBOUND_DRIVER:
                if (!domain)
                        goto out;
                detach_device(domain, devid);
@@ -1077,7 +1181,7 @@ static int device_change_notifier(struct notifier_block *nb,
                dma_domain = find_protection_domain(devid);
                if (dma_domain)
                        goto out;
-               dma_domain = dma_ops_domain_alloc(iommu, order);
+               dma_domain = dma_ops_domain_alloc(iommu);
                if (!dma_domain)
                        goto out;
                dma_domain->target_dev = devid;
@@ -1098,7 +1202,7 @@ out:
        return 0;
 }
 
-struct notifier_block device_nb = {
+static struct notifier_block device_nb = {
        .notifier_call = device_change_notifier,
 };
 
@@ -1188,8 +1292,8 @@ static int get_device_resources(struct device *dev,
                        dma_dom = (*iommu)->default_dom;
                *domain = &dma_dom->domain;
                attach_device(*iommu, *domain, *bdf);
-               printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
-                               "device %s\n", (*domain)->id, dev_name(dev));
+               DUMP_printk("Using protection domain %d for device %s\n",
+                           (*domain)->id, dev_name(dev));
        }
 
        if (domain_for_device(_bdf) == NULL)
@@ -1354,10 +1458,26 @@ static dma_addr_t __map_single(struct device *dev,
        if (align)
                align_mask = (1UL << get_order(size)) - 1;
 
+retry:
        address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
                                          dma_mask);
-       if (unlikely(address == bad_dma_address))
-               goto out;
+       if (unlikely(address == bad_dma_address)) {
+               /*
+                * setting next_address here will let the address
+                * allocator only scan the new allocated range in the
+                * first run. This is a small optimization.
+                */
+               dma_dom->next_address = dma_dom->aperture_size;
+
+               if (alloc_new_range(iommu, dma_dom, false, GFP_ATOMIC))
+                       goto out;
+
+               /*
+                * aperture was sucessfully enlarged by 128 MB, try
+                * allocation again
+                */
+               goto retry;
+       }
 
        start = address;
        for (i = 0; i < pages; ++i) {
@@ -1653,7 +1773,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
        flag |= __GFP_ZERO;
        virt_addr = (void *)__get_free_pages(flag, get_order(size));
        if (!virt_addr)
-               return 0;
+               return NULL;
 
        paddr = virt_to_phys(virt_addr);
 
@@ -1673,8 +1793,10 @@ static void *alloc_coherent(struct device *dev, size_t size,
        *dma_addr = __map_single(dev, iommu, domain->priv, paddr,
                                 size, DMA_BIDIRECTIONAL, true, dma_mask);
 
-       if (*dma_addr == bad_dma_address)
+       if (*dma_addr == bad_dma_address) {
+               spin_unlock_irqrestore(&domain->lock, flags);
                goto out_free;
+       }
 
        iommu_completion_wait(iommu);
 
@@ -1761,7 +1883,6 @@ static void prealloc_protection_domains(void)
        struct pci_dev *dev = NULL;
        struct dma_ops_domain *dma_dom;
        struct amd_iommu *iommu;
-       int order = amd_iommu_aperture_order;
        u16 devid;
 
        while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
@@ -1774,7 +1895,7 @@ static void prealloc_protection_domains(void)
                iommu = amd_iommu_rlookup_table[devid];
                if (!iommu)
                        continue;
-               dma_dom = dma_ops_domain_alloc(iommu, order);
+               dma_dom = dma_ops_domain_alloc(iommu);
                if (!dma_dom)
                        continue;
                init_unity_mappings_for_device(dma_dom, devid);
@@ -1800,7 +1921,6 @@ static struct dma_map_ops amd_iommu_dma_ops = {
 int __init amd_iommu_init_dma_ops(void)
 {
        struct amd_iommu *iommu;
-       int order = amd_iommu_aperture_order;
        int ret;
 
        /*
@@ -1808,8 +1928,8 @@ int __init amd_iommu_init_dma_ops(void)
         * found in the system. Devices not assigned to any other
         * protection domain will be assigned to the default one.
         */
-       list_for_each_entry(iommu, &amd_iommu_list, list) {
-               iommu->default_dom = dma_ops_domain_alloc(iommu, order);
+       for_each_iommu(iommu) {
+               iommu->default_dom = dma_ops_domain_alloc(iommu);
                if (iommu->default_dom == NULL)
                        return -ENOMEM;
                iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
@@ -1846,7 +1966,7 @@ int __init amd_iommu_init_dma_ops(void)
 
 free_domains:
 
-       list_for_each_entry(iommu, &amd_iommu_list, list) {
+       for_each_iommu(iommu) {
                if (iommu->default_dom)
                        dma_ops_domain_free(iommu->default_dom);
        }
@@ -1978,7 +2098,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
 
        old_domain = domain_for_device(devid);
        if (old_domain)
-               return -EBUSY;
+               detach_device(old_domain, devid);
 
        attach_device(iommu, domain, devid);