X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=drivers%2Fpci%2Fintel-iommu.c;h=389fdd6f4a9f6afcdd0877150887dc1f1571a914;hb=a9b12619f7b6f19c871437ec24a088787a04b1de;hp=585e188c17463c128ec589ad0c198442a606ee39;hpb=f8bab73515ca5b392680bb033dceeb37b8463e95;p=safe%2Fjmp%2Flinux-2.6 diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 585e188..389fdd6 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -14,13 +14,15 @@ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple * Place - Suite 330, Boston, MA 02111-1307 USA. * - * Copyright (C) Ashok Raj - * Copyright (C) Shaohua Li - * Copyright (C) Anil S Keshavamurthy + * Copyright (C) 2006-2008 Intel Corporation + * Author: Ashok Raj + * Author: Shaohua Li + * Author: Anil S Keshavamurthy */ #include #include +#include #include #include #include @@ -30,11 +32,12 @@ #include #include #include +#include #include "iova.h" #include "intel-iommu.h" #include /* force_iommu in this header in x86-64*/ #include -#include +#include #include "pci.h" #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) @@ -46,15 +49,37 @@ #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 -#define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */ - #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) + +static void flush_unmaps_timeout(unsigned long data); + +DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); + +#define HIGH_WATER_MARK 250 +struct deferred_flush_tables { + int next; + struct iova *iova[HIGH_WATER_MARK]; + struct dmar_domain *domain[HIGH_WATER_MARK]; +}; + +static struct deferred_flush_tables *deferred_flush; + +/* bitmap for indexing intel_iommus */ +static int g_num_of_iommus; + +static DEFINE_SPINLOCK(async_umap_flush_lock); +static LIST_HEAD(unmaps_to_do); + +static int timer_on; +static long list_size; + static void domain_remove_dev_info(struct dmar_domain *domain); -static int dmar_disabled; +int dmar_disabled; static int __initdata dmar_map_gfx = 1; static int dmar_forcedac; +static int intel_iommu_strict; #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) static DEFINE_SPINLOCK(device_domain_lock); @@ -73,9 +98,13 @@ static int __init intel_iommu_setup(char *str) printk(KERN_INFO "Intel-IOMMU: disable GFX device mapping\n"); } else if (!strncmp(str, "forcedac", 8)) { - printk (KERN_INFO + printk(KERN_INFO "Intel-IOMMU: Forcing DAC for PCI devices\n"); dmar_forcedac = 1; + } else if (!strncmp(str, "strict", 6)) { + printk(KERN_INFO + "Intel-IOMMU: disable batched IOTLB flush\n"); + intel_iommu_strict = 1; } str += strcspn(str, ","); @@ -152,13 +181,6 @@ void free_iova_mem(struct iova *iova) kmem_cache_free(iommu_iova_cache, iova); } -static inline void __iommu_flush_cache( - struct intel_iommu *iommu, void *addr, int size) -{ - if (!ecap_coherent(iommu->ecap)) - clflush_cache_range(addr, size); -} - /* Gets context entry for a given bus and devfn */ static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, u8 bus, u8 devfn) @@ -455,19 +477,6 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) return 0; } -#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ -{\ - unsigned long start_time = jiffies;\ - while (1) {\ - sts = op (iommu->reg + offset);\ - if (cond)\ - break;\ - if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))\ - panic("DMAR hardware is malfunctioning\n");\ - cpu_relax();\ - }\ -} - static void iommu_set_root_entry(struct intel_iommu *iommu) { void *addr; @@ -745,7 +754,7 @@ static int iommu_disable_translation(struct intel_iommu *iommu) /* iommu interrupt handling. Most stuff are MSI-like. */ -static char *fault_reason_strings[] = +static const char *fault_reason_strings[] = { "Software", "Present bit in root entry is clear", @@ -760,14 +769,13 @@ static char *fault_reason_strings[] = "non-zero reserved fields in RTP", "non-zero reserved fields in CTP", "non-zero reserved fields in PTE", - "Unknown" }; #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1) -char *dmar_get_fault_reason(u8 fault_reason) +const char *dmar_get_fault_reason(u8 fault_reason) { - if (fault_reason >= MAX_FAULT_REASON_IDX) - return fault_reason_strings[MAX_FAULT_REASON_IDX - 1]; + if (fault_reason > MAX_FAULT_REASON_IDX) + return "Unknown"; else return fault_reason_strings[fault_reason]; } @@ -825,7 +833,7 @@ void dmar_msi_read(int irq, struct msi_msg *msg) static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type, u8 fault_reason, u16 source_id, u64 addr) { - char *reason; + const char *reason; reason = dmar_get_fault_reason(fault_reason); @@ -958,6 +966,8 @@ static int iommu_init_domains(struct intel_iommu *iommu) return -ENOMEM; } + spin_lock_init(&iommu->lock); + /* * if Caching mode is set, then invalid translations are tagged * with domainid 0. Hence we need to pre-allocate it. @@ -967,65 +977,14 @@ static int iommu_init_domains(struct intel_iommu *iommu) return 0; } -static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd) -{ - struct intel_iommu *iommu; - int ret; - int map_size; - u32 ver; - - iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); - if (!iommu) - return NULL; - iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K); - if (!iommu->reg) { - printk(KERN_ERR "IOMMU: can't map the region\n"); - goto error; - } - iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); - iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); - - /* the registers might be more than one page */ - map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), - cap_max_fault_reg_offset(iommu->cap)); - map_size = PAGE_ALIGN_4K(map_size); - if (map_size > PAGE_SIZE_4K) { - iounmap(iommu->reg); - iommu->reg = ioremap(drhd->reg_base_addr, map_size); - if (!iommu->reg) { - printk(KERN_ERR "IOMMU: can't map the region\n"); - goto error; - } - } - - ver = readl(iommu->reg + DMAR_VER_REG); - pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n", - drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver), - iommu->cap, iommu->ecap); - ret = iommu_init_domains(iommu); - if (ret) - goto error_unmap; - spin_lock_init(&iommu->lock); - spin_lock_init(&iommu->register_lock); - - drhd->iommu = iommu; - return iommu; -error_unmap: - iounmap(iommu->reg); -error: - kfree(iommu); - return NULL; -} static void domain_exit(struct dmar_domain *domain); -static void free_iommu(struct intel_iommu *iommu) + +void free_dmar_iommu(struct intel_iommu *iommu) { struct dmar_domain *domain; int i; - if (!iommu) - return; - i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap)); for (; i < cap_ndoms(iommu->cap); ) { domain = iommu->domains[i]; @@ -1050,10 +1009,6 @@ static void free_iommu(struct intel_iommu *iommu) /* free context mapping */ free_context_table(iommu); - - if (iommu->reg) - iounmap(iommu->reg); - kfree(iommu); } static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) @@ -1097,6 +1052,8 @@ static void iommu_free_domain(struct dmar_domain *domain) } static struct iova_domain reserved_iova_list; +static struct lock_class_key reserved_alloc_key; +static struct lock_class_key reserved_rbtree_key; static void dmar_init_reserved_ranges(void) { @@ -1107,6 +1064,11 @@ static void dmar_init_reserved_ranges(void) init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN); + lockdep_set_class(&reserved_iova_list.iova_alloc_lock, + &reserved_alloc_key); + lockdep_set_class(&reserved_iova_list.iova_rbtree_lock, + &reserved_rbtree_key); + /* IOAPIC ranges shouldn't be accessed by DMA */ iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START), IOVA_PFN(IOAPIC_RANGE_END)); @@ -1391,37 +1353,6 @@ find_domain(struct pci_dev *pdev) return NULL; } -static int dmar_pci_device_match(struct pci_dev *devices[], int cnt, - struct pci_dev *dev) -{ - int index; - - while (dev) { - for (index = 0; index < cnt; index ++) - if (dev == devices[index]) - return 1; - - /* Check our parent */ - dev = dev->bus->self; - } - - return 0; -} - -static struct dmar_drhd_unit * -dmar_find_matched_drhd_unit(struct pci_dev *dev) -{ - struct dmar_drhd_unit *drhd = NULL; - - list_for_each_entry(drhd, &dmar_drhd_units, list) { - if (drhd->include_all || dmar_pci_device_match(drhd->devices, - drhd->devices_cnt, dev)) - return drhd; - } - - return NULL; -} - /* domain is initialized */ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) { @@ -1602,12 +1533,43 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, } #ifdef CONFIG_DMAR_GFX_WA -extern int arch_get_ram_range(int slot, u64 *addr, u64 *size); +struct iommu_prepare_data { + struct pci_dev *pdev; + int ret; +}; + +static int __init iommu_prepare_work_fn(unsigned long start_pfn, + unsigned long end_pfn, void *datax) +{ + struct iommu_prepare_data *data; + + data = (struct iommu_prepare_data *)datax; + + data->ret = iommu_prepare_identity_map(data->pdev, + start_pfn<ret; + +} + +static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev) +{ + int nid; + struct iommu_prepare_data data; + + data.pdev = pdev; + data.ret = 0; + + for_each_online_node(nid) { + work_with_active_regions(nid, iommu_prepare_work_fn, &data); + if (data.ret) + return data.ret; + } + return data.ret; +} + static void __init iommu_prepare_gfx_mapping(void) { struct pci_dev *pdev = NULL; - u64 base, size; - int slot; int ret; for_each_pci_dev(pdev) { @@ -1616,17 +1578,9 @@ static void __init iommu_prepare_gfx_mapping(void) continue; printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n", pci_name(pdev)); - slot = arch_get_ram_range(0, &base, &size); - while (slot >= 0) { - ret = iommu_prepare_identity_map(pdev, - base, base + size); - if (ret) - goto error; - slot = arch_get_ram_range(slot, &base, &size); - } - continue; -error: - printk(KERN_ERR "IOMMU: mapping reserved region failed\n"); + ret = iommu_prepare_with_active_regions(pdev); + if (ret) + printk(KERN_ERR "IOMMU: mapping reserved region failed\n"); } } #endif @@ -1662,7 +1616,7 @@ int __init init_dmars(void) struct dmar_rmrr_unit *rmrr; struct pci_dev *pdev; struct intel_iommu *iommu; - int ret, unit = 0; + int i, ret, unit = 0; /* * for each drhd @@ -1671,13 +1625,30 @@ int __init init_dmars(void) * endfor */ for_each_drhd_unit(drhd) { + g_num_of_iommus++; + /* + * lock not needed as this is only incremented in the single + * threaded kernel __init code path all other access are read + * only + */ + } + + deferred_flush = kzalloc(g_num_of_iommus * + sizeof(struct deferred_flush_tables), GFP_KERNEL); + if (!deferred_flush) { + ret = -ENOMEM; + goto error; + } + + for_each_drhd_unit(drhd) { if (drhd->ignored) continue; - iommu = alloc_iommu(drhd); - if (!iommu) { - ret = -ENOMEM; + + iommu = drhd->iommu; + + ret = iommu_init_domains(iommu); + if (ret) goto error; - } /* * TBD: @@ -1706,7 +1677,6 @@ int __init init_dmars(void) * endfor */ for_each_rmrr_units(rmrr) { - int i; for (i = 0; i < rmrr->devices_cnt; i++) { pdev = rmrr->devices[i]; /* some BIOS lists non-exist devices in DMAR table */ @@ -1843,32 +1813,31 @@ get_valid_domain_for_dev(struct pci_dev *pdev) return domain; } -static dma_addr_t intel_map_single(struct device *hwdev, void *addr, - size_t size, int dir) +static dma_addr_t +intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir) { struct pci_dev *pdev = to_pci_dev(hwdev); - int ret; struct dmar_domain *domain; - unsigned long start_addr; + unsigned long start_paddr; struct iova *iova; int prot = 0; + int ret; BUG_ON(dir == DMA_NONE); if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) - return virt_to_bus(addr); + return paddr; domain = get_valid_domain_for_dev(pdev); if (!domain) return 0; - addr = (void *)virt_to_phys(addr); - size = aligned_size((u64)addr, size); + size = aligned_size((u64)paddr, size); iova = __intel_alloc_iova(hwdev, domain, size); if (!iova) goto error; - start_addr = iova->pfn_lo << PAGE_SHIFT_4K; + start_paddr = iova->pfn_lo << PAGE_SHIFT_4K; /* * Check if DMAR supports zero-length reads on write only @@ -1880,36 +1849,93 @@ static dma_addr_t intel_map_single(struct device *hwdev, void *addr, if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) prot |= DMA_PTE_WRITE; /* - * addr - (addr + size) might be partial page, we should map the whole + * paddr - (paddr + size) might be partial page, we should map the whole * page. Note: if two part of one page are separately mapped, we - * might have two guest_addr mapping to the same host addr, but this + * might have two guest_addr mapping to the same host paddr, but this * is not a big problem */ - ret = domain_page_mapping(domain, start_addr, - ((u64)addr) & PAGE_MASK_4K, size, prot); + ret = domain_page_mapping(domain, start_paddr, + ((u64)paddr) & PAGE_MASK_4K, size, prot); if (ret) goto error; pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n", - pci_name(pdev), size, (u64)addr, - size, (u64)start_addr, dir); + pci_name(pdev), size, (u64)paddr, + size, (u64)start_paddr, dir); /* it's a non-present to present mapping */ ret = iommu_flush_iotlb_psi(domain->iommu, domain->id, - start_addr, size >> PAGE_SHIFT_4K, 1); + start_paddr, size >> PAGE_SHIFT_4K, 1); if (ret) iommu_flush_write_buffer(domain->iommu); - return (start_addr + ((u64)addr & (~PAGE_MASK_4K))); + return (start_paddr + ((u64)paddr & (~PAGE_MASK_4K))); error: if (iova) __free_iova(&domain->iovad, iova); printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n", - pci_name(pdev), size, (u64)addr, dir); + pci_name(pdev), size, (u64)paddr, dir); return 0; } +static void flush_unmaps(void) +{ + int i, j; + + timer_on = 0; + + /* just flush them all */ + for (i = 0; i < g_num_of_iommus; i++) { + if (deferred_flush[i].next) { + struct intel_iommu *iommu = + deferred_flush[i].domain[0]->iommu; + + iommu_flush_iotlb_global(iommu, 0); + for (j = 0; j < deferred_flush[i].next; j++) { + __free_iova(&deferred_flush[i].domain[j]->iovad, + deferred_flush[i].iova[j]); + } + deferred_flush[i].next = 0; + } + } + + list_size = 0; +} + +static void flush_unmaps_timeout(unsigned long data) +{ + unsigned long flags; + + spin_lock_irqsave(&async_umap_flush_lock, flags); + flush_unmaps(); + spin_unlock_irqrestore(&async_umap_flush_lock, flags); +} + +static void add_unmap(struct dmar_domain *dom, struct iova *iova) +{ + unsigned long flags; + int next, iommu_id; + + spin_lock_irqsave(&async_umap_flush_lock, flags); + if (list_size == HIGH_WATER_MARK) + flush_unmaps(); + + iommu_id = dom->iommu->seq_id; + + next = deferred_flush[iommu_id].next; + deferred_flush[iommu_id].domain[next] = dom; + deferred_flush[iommu_id].iova[next] = iova; + deferred_flush[iommu_id].next++; + + if (!timer_on) { + mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10)); + timer_on = 1; + } + list_size++; + spin_unlock_irqrestore(&async_umap_flush_lock, flags); +} + static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size, int dir) { @@ -1937,13 +1963,19 @@ static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, dma_pte_clear_range(domain, start_addr, start_addr + size); /* free page tables */ dma_pte_free_pagetable(domain, start_addr, start_addr + size); - - if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr, - size >> PAGE_SHIFT_4K, 0)) - iommu_flush_write_buffer(domain->iommu); - - /* free iova */ - __free_iova(&domain->iovad, iova); + if (intel_iommu_strict) { + if (iommu_flush_iotlb_psi(domain->iommu, + domain->id, start_addr, size >> PAGE_SHIFT_4K, 0)) + iommu_flush_write_buffer(domain->iommu); + /* free iova */ + __free_iova(&domain->iovad, iova); + } else { + add_unmap(domain, iova); + /* + * queue up the release of the unmap to save the 1/6th of the + * cpu used up by the iotlb flush operation... + */ + } } static void * intel_alloc_coherent(struct device *hwdev, size_t size, @@ -1961,7 +1993,7 @@ static void * intel_alloc_coherent(struct device *hwdev, size_t size, return NULL; memset(vaddr, 0, size); - *dma_handle = intel_map_single(hwdev, vaddr, size, DMA_BIDIRECTIONAL); + *dma_handle = intel_map_single(hwdev, virt_to_bus(vaddr), size, DMA_BIDIRECTIONAL); if (*dma_handle) return vaddr; free_pages((unsigned long)vaddr, order); @@ -2206,15 +2238,6 @@ static void __init iommu_exit_mempool(void) } -void __init detect_intel_iommu(void) -{ - if (swiotlb || no_iommu || iommu_detected || dmar_disabled) - return; - if (early_dmar_detect()) { - iommu_detected = 1; - } -} - static void __init init_no_remapping_devices(void) { struct dmar_drhd_unit *drhd; @@ -2261,12 +2284,19 @@ int __init intel_iommu_init(void) { int ret = 0; - if (no_iommu || swiotlb || dmar_disabled) - return -ENODEV; - if (dmar_table_init()) return -ENODEV; + if (dmar_dev_scope_init()) + return -ENODEV; + + /* + * Check the need for DMA-remapping initialization now. + * Above initialization will also be used by Interrupt-remapping. + */ + if (no_iommu || swiotlb || dmar_disabled) + return -ENODEV; + iommu_init_mempool(); dmar_init_reserved_ranges(); @@ -2282,6 +2312,7 @@ int __init intel_iommu_init(void) printk(KERN_INFO "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n"); + init_timer(&unmap_timer); force_iommu = 1; dma_ops = &intel_dma_ops; return 0;