Add domain flag DOMAIN_FLAG_VIRTUAL_MACHINE
[safe/jmp/linux-2.6] / drivers / pci / intel-iommu.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  * Author: Fenghua Yu <fenghua.yu@intel.com>
22  */
23
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/intel-iommu.h>
38 #include <asm/cacheflush.h>
39 #include <asm/iommu.h>
40 #include "pci.h"
41
42 #define ROOT_SIZE               VTD_PAGE_SIZE
43 #define CONTEXT_SIZE            VTD_PAGE_SIZE
44
45 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
46 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
47
48 #define IOAPIC_RANGE_START      (0xfee00000)
49 #define IOAPIC_RANGE_END        (0xfeefffff)
50 #define IOVA_START_ADDR         (0x1000)
51
52 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
53
54 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
55
56 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
57 #define DMA_32BIT_PFN           IOVA_PFN(DMA_32BIT_MASK)
58 #define DMA_64BIT_PFN           IOVA_PFN(DMA_64BIT_MASK)
59
60 /* global iommu list, set NULL for ignored DMAR units */
61 static struct intel_iommu **g_iommus;
62
63 /*
64  * 0: Present
65  * 1-11: Reserved
66  * 12-63: Context Ptr (12 - (haw-1))
67  * 64-127: Reserved
68  */
69 struct root_entry {
70         u64     val;
71         u64     rsvd1;
72 };
73 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
74 static inline bool root_present(struct root_entry *root)
75 {
76         return (root->val & 1);
77 }
78 static inline void set_root_present(struct root_entry *root)
79 {
80         root->val |= 1;
81 }
82 static inline void set_root_value(struct root_entry *root, unsigned long value)
83 {
84         root->val |= value & VTD_PAGE_MASK;
85 }
86
87 static inline struct context_entry *
88 get_context_addr_from_root(struct root_entry *root)
89 {
90         return (struct context_entry *)
91                 (root_present(root)?phys_to_virt(
92                 root->val & VTD_PAGE_MASK) :
93                 NULL);
94 }
95
96 /*
97  * low 64 bits:
98  * 0: present
99  * 1: fault processing disable
100  * 2-3: translation type
101  * 12-63: address space root
102  * high 64 bits:
103  * 0-2: address width
104  * 3-6: aval
105  * 8-23: domain id
106  */
107 struct context_entry {
108         u64 lo;
109         u64 hi;
110 };
111
112 static inline bool context_present(struct context_entry *context)
113 {
114         return (context->lo & 1);
115 }
116 static inline void context_set_present(struct context_entry *context)
117 {
118         context->lo |= 1;
119 }
120
121 static inline void context_set_fault_enable(struct context_entry *context)
122 {
123         context->lo &= (((u64)-1) << 2) | 1;
124 }
125
126 #define CONTEXT_TT_MULTI_LEVEL 0
127
128 static inline void context_set_translation_type(struct context_entry *context,
129                                                 unsigned long value)
130 {
131         context->lo &= (((u64)-1) << 4) | 3;
132         context->lo |= (value & 3) << 2;
133 }
134
135 static inline void context_set_address_root(struct context_entry *context,
136                                             unsigned long value)
137 {
138         context->lo |= value & VTD_PAGE_MASK;
139 }
140
141 static inline void context_set_address_width(struct context_entry *context,
142                                              unsigned long value)
143 {
144         context->hi |= value & 7;
145 }
146
147 static inline void context_set_domain_id(struct context_entry *context,
148                                          unsigned long value)
149 {
150         context->hi |= (value & ((1 << 16) - 1)) << 8;
151 }
152
153 static inline void context_clear_entry(struct context_entry *context)
154 {
155         context->lo = 0;
156         context->hi = 0;
157 }
158
159 /*
160  * 0: readable
161  * 1: writable
162  * 2-6: reserved
163  * 7: super page
164  * 8-11: available
165  * 12-63: Host physcial address
166  */
167 struct dma_pte {
168         u64 val;
169 };
170
171 static inline void dma_clear_pte(struct dma_pte *pte)
172 {
173         pte->val = 0;
174 }
175
176 static inline void dma_set_pte_readable(struct dma_pte *pte)
177 {
178         pte->val |= DMA_PTE_READ;
179 }
180
181 static inline void dma_set_pte_writable(struct dma_pte *pte)
182 {
183         pte->val |= DMA_PTE_WRITE;
184 }
185
186 static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
187 {
188         pte->val = (pte->val & ~3) | (prot & 3);
189 }
190
191 static inline u64 dma_pte_addr(struct dma_pte *pte)
192 {
193         return (pte->val & VTD_PAGE_MASK);
194 }
195
196 static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr)
197 {
198         pte->val |= (addr & VTD_PAGE_MASK);
199 }
200
201 static inline bool dma_pte_present(struct dma_pte *pte)
202 {
203         return (pte->val & 3) != 0;
204 }
205
206 /* devices under the same p2p bridge are owned in one domain */
207 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 < 0)
208
209 /* domain represents a virtual machine, more than one devices
210  * across iommus may be owned in one domain, e.g. kvm guest.
211  */
212 #define DOMAIN_FLAG_VIRTUAL_MACHINE     (1 << 1)
213
214 struct dmar_domain {
215         int     id;                     /* domain id */
216         unsigned long iommu_bmp;        /* bitmap of iommus this domain uses*/
217
218         struct list_head devices;       /* all devices' list */
219         struct iova_domain iovad;       /* iova's that belong to this domain */
220
221         struct dma_pte  *pgd;           /* virtual address */
222         spinlock_t      mapping_lock;   /* page table lock */
223         int             gaw;            /* max guest address width */
224
225         /* adjusted guest address width, 0 is level 2 30-bit */
226         int             agaw;
227
228         int             flags;          /* flags to find out type of domain */
229
230         int             iommu_coherency;/* indicate coherency of iommu access */
231 };
232
233 /* PCI domain-device relationship */
234 struct device_domain_info {
235         struct list_head link;  /* link to domain siblings */
236         struct list_head global; /* link to global list */
237         u8 bus;                 /* PCI bus numer */
238         u8 devfn;               /* PCI devfn number */
239         struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
240         struct dmar_domain *domain; /* pointer to domain */
241 };
242
243 static void flush_unmaps_timeout(unsigned long data);
244
245 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
246
247 #define HIGH_WATER_MARK 250
248 struct deferred_flush_tables {
249         int next;
250         struct iova *iova[HIGH_WATER_MARK];
251         struct dmar_domain *domain[HIGH_WATER_MARK];
252 };
253
254 static struct deferred_flush_tables *deferred_flush;
255
256 /* bitmap for indexing intel_iommus */
257 static int g_num_of_iommus;
258
259 static DEFINE_SPINLOCK(async_umap_flush_lock);
260 static LIST_HEAD(unmaps_to_do);
261
262 static int timer_on;
263 static long list_size;
264
265 static void domain_remove_dev_info(struct dmar_domain *domain);
266
267 int dmar_disabled;
268 static int __initdata dmar_map_gfx = 1;
269 static int dmar_forcedac;
270 static int intel_iommu_strict;
271
272 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
273 static DEFINE_SPINLOCK(device_domain_lock);
274 static LIST_HEAD(device_domain_list);
275
276 static int __init intel_iommu_setup(char *str)
277 {
278         if (!str)
279                 return -EINVAL;
280         while (*str) {
281                 if (!strncmp(str, "off", 3)) {
282                         dmar_disabled = 1;
283                         printk(KERN_INFO"Intel-IOMMU: disabled\n");
284                 } else if (!strncmp(str, "igfx_off", 8)) {
285                         dmar_map_gfx = 0;
286                         printk(KERN_INFO
287                                 "Intel-IOMMU: disable GFX device mapping\n");
288                 } else if (!strncmp(str, "forcedac", 8)) {
289                         printk(KERN_INFO
290                                 "Intel-IOMMU: Forcing DAC for PCI devices\n");
291                         dmar_forcedac = 1;
292                 } else if (!strncmp(str, "strict", 6)) {
293                         printk(KERN_INFO
294                                 "Intel-IOMMU: disable batched IOTLB flush\n");
295                         intel_iommu_strict = 1;
296                 }
297
298                 str += strcspn(str, ",");
299                 while (*str == ',')
300                         str++;
301         }
302         return 0;
303 }
304 __setup("intel_iommu=", intel_iommu_setup);
305
306 static struct kmem_cache *iommu_domain_cache;
307 static struct kmem_cache *iommu_devinfo_cache;
308 static struct kmem_cache *iommu_iova_cache;
309
310 static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
311 {
312         unsigned int flags;
313         void *vaddr;
314
315         /* trying to avoid low memory issues */
316         flags = current->flags & PF_MEMALLOC;
317         current->flags |= PF_MEMALLOC;
318         vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
319         current->flags &= (~PF_MEMALLOC | flags);
320         return vaddr;
321 }
322
323
324 static inline void *alloc_pgtable_page(void)
325 {
326         unsigned int flags;
327         void *vaddr;
328
329         /* trying to avoid low memory issues */
330         flags = current->flags & PF_MEMALLOC;
331         current->flags |= PF_MEMALLOC;
332         vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
333         current->flags &= (~PF_MEMALLOC | flags);
334         return vaddr;
335 }
336
337 static inline void free_pgtable_page(void *vaddr)
338 {
339         free_page((unsigned long)vaddr);
340 }
341
342 static inline void *alloc_domain_mem(void)
343 {
344         return iommu_kmem_cache_alloc(iommu_domain_cache);
345 }
346
347 static void free_domain_mem(void *vaddr)
348 {
349         kmem_cache_free(iommu_domain_cache, vaddr);
350 }
351
352 static inline void * alloc_devinfo_mem(void)
353 {
354         return iommu_kmem_cache_alloc(iommu_devinfo_cache);
355 }
356
357 static inline void free_devinfo_mem(void *vaddr)
358 {
359         kmem_cache_free(iommu_devinfo_cache, vaddr);
360 }
361
362 struct iova *alloc_iova_mem(void)
363 {
364         return iommu_kmem_cache_alloc(iommu_iova_cache);
365 }
366
367 void free_iova_mem(struct iova *iova)
368 {
369         kmem_cache_free(iommu_iova_cache, iova);
370 }
371
372
373 static inline int width_to_agaw(int width);
374
375 /* calculate agaw for each iommu.
376  * "SAGAW" may be different across iommus, use a default agaw, and
377  * get a supported less agaw for iommus that don't support the default agaw.
378  */
379 int iommu_calculate_agaw(struct intel_iommu *iommu)
380 {
381         unsigned long sagaw;
382         int agaw = -1;
383
384         sagaw = cap_sagaw(iommu->cap);
385         for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
386              agaw >= 0; agaw--) {
387                 if (test_bit(agaw, &sagaw))
388                         break;
389         }
390
391         return agaw;
392 }
393
394 /* in native case, each domain is related to only one iommu */
395 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
396 {
397         int iommu_id;
398
399         BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
400
401         iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
402         if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
403                 return NULL;
404
405         return g_iommus[iommu_id];
406 }
407
408 /* "Coherency" capability may be different across iommus */
409 static void domain_update_iommu_coherency(struct dmar_domain *domain)
410 {
411         int i;
412
413         domain->iommu_coherency = 1;
414
415         i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
416         for (; i < g_num_of_iommus; ) {
417                 if (!ecap_coherent(g_iommus[i]->ecap)) {
418                         domain->iommu_coherency = 0;
419                         break;
420                 }
421                 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
422         }
423 }
424
425 /* Gets context entry for a given bus and devfn */
426 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
427                 u8 bus, u8 devfn)
428 {
429         struct root_entry *root;
430         struct context_entry *context;
431         unsigned long phy_addr;
432         unsigned long flags;
433
434         spin_lock_irqsave(&iommu->lock, flags);
435         root = &iommu->root_entry[bus];
436         context = get_context_addr_from_root(root);
437         if (!context) {
438                 context = (struct context_entry *)alloc_pgtable_page();
439                 if (!context) {
440                         spin_unlock_irqrestore(&iommu->lock, flags);
441                         return NULL;
442                 }
443                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
444                 phy_addr = virt_to_phys((void *)context);
445                 set_root_value(root, phy_addr);
446                 set_root_present(root);
447                 __iommu_flush_cache(iommu, root, sizeof(*root));
448         }
449         spin_unlock_irqrestore(&iommu->lock, flags);
450         return &context[devfn];
451 }
452
453 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
454 {
455         struct root_entry *root;
456         struct context_entry *context;
457         int ret;
458         unsigned long flags;
459
460         spin_lock_irqsave(&iommu->lock, flags);
461         root = &iommu->root_entry[bus];
462         context = get_context_addr_from_root(root);
463         if (!context) {
464                 ret = 0;
465                 goto out;
466         }
467         ret = context_present(&context[devfn]);
468 out:
469         spin_unlock_irqrestore(&iommu->lock, flags);
470         return ret;
471 }
472
473 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
474 {
475         struct root_entry *root;
476         struct context_entry *context;
477         unsigned long flags;
478
479         spin_lock_irqsave(&iommu->lock, flags);
480         root = &iommu->root_entry[bus];
481         context = get_context_addr_from_root(root);
482         if (context) {
483                 context_clear_entry(&context[devfn]);
484                 __iommu_flush_cache(iommu, &context[devfn], \
485                         sizeof(*context));
486         }
487         spin_unlock_irqrestore(&iommu->lock, flags);
488 }
489
490 static void free_context_table(struct intel_iommu *iommu)
491 {
492         struct root_entry *root;
493         int i;
494         unsigned long flags;
495         struct context_entry *context;
496
497         spin_lock_irqsave(&iommu->lock, flags);
498         if (!iommu->root_entry) {
499                 goto out;
500         }
501         for (i = 0; i < ROOT_ENTRY_NR; i++) {
502                 root = &iommu->root_entry[i];
503                 context = get_context_addr_from_root(root);
504                 if (context)
505                         free_pgtable_page(context);
506         }
507         free_pgtable_page(iommu->root_entry);
508         iommu->root_entry = NULL;
509 out:
510         spin_unlock_irqrestore(&iommu->lock, flags);
511 }
512
513 /* page table handling */
514 #define LEVEL_STRIDE            (9)
515 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
516
517 static inline int agaw_to_level(int agaw)
518 {
519         return agaw + 2;
520 }
521
522 static inline int agaw_to_width(int agaw)
523 {
524         return 30 + agaw * LEVEL_STRIDE;
525
526 }
527
528 static inline int width_to_agaw(int width)
529 {
530         return (width - 30) / LEVEL_STRIDE;
531 }
532
533 static inline unsigned int level_to_offset_bits(int level)
534 {
535         return (12 + (level - 1) * LEVEL_STRIDE);
536 }
537
538 static inline int address_level_offset(u64 addr, int level)
539 {
540         return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
541 }
542
543 static inline u64 level_mask(int level)
544 {
545         return ((u64)-1 << level_to_offset_bits(level));
546 }
547
548 static inline u64 level_size(int level)
549 {
550         return ((u64)1 << level_to_offset_bits(level));
551 }
552
553 static inline u64 align_to_level(u64 addr, int level)
554 {
555         return ((addr + level_size(level) - 1) & level_mask(level));
556 }
557
558 static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
559 {
560         int addr_width = agaw_to_width(domain->agaw);
561         struct dma_pte *parent, *pte = NULL;
562         int level = agaw_to_level(domain->agaw);
563         int offset;
564         unsigned long flags;
565         struct intel_iommu *iommu = domain_get_iommu(domain);
566
567         BUG_ON(!domain->pgd);
568
569         addr &= (((u64)1) << addr_width) - 1;
570         parent = domain->pgd;
571
572         spin_lock_irqsave(&domain->mapping_lock, flags);
573         while (level > 0) {
574                 void *tmp_page;
575
576                 offset = address_level_offset(addr, level);
577                 pte = &parent[offset];
578                 if (level == 1)
579                         break;
580
581                 if (!dma_pte_present(pte)) {
582                         tmp_page = alloc_pgtable_page();
583
584                         if (!tmp_page) {
585                                 spin_unlock_irqrestore(&domain->mapping_lock,
586                                         flags);
587                                 return NULL;
588                         }
589                         __iommu_flush_cache(iommu, tmp_page,
590                                         PAGE_SIZE);
591                         dma_set_pte_addr(pte, virt_to_phys(tmp_page));
592                         /*
593                          * high level table always sets r/w, last level page
594                          * table control read/write
595                          */
596                         dma_set_pte_readable(pte);
597                         dma_set_pte_writable(pte);
598                         __iommu_flush_cache(iommu, pte, sizeof(*pte));
599                 }
600                 parent = phys_to_virt(dma_pte_addr(pte));
601                 level--;
602         }
603
604         spin_unlock_irqrestore(&domain->mapping_lock, flags);
605         return pte;
606 }
607
608 /* return address's pte at specific level */
609 static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
610                 int level)
611 {
612         struct dma_pte *parent, *pte = NULL;
613         int total = agaw_to_level(domain->agaw);
614         int offset;
615
616         parent = domain->pgd;
617         while (level <= total) {
618                 offset = address_level_offset(addr, total);
619                 pte = &parent[offset];
620                 if (level == total)
621                         return pte;
622
623                 if (!dma_pte_present(pte))
624                         break;
625                 parent = phys_to_virt(dma_pte_addr(pte));
626                 total--;
627         }
628         return NULL;
629 }
630
631 /* clear one page's page table */
632 static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
633 {
634         struct dma_pte *pte = NULL;
635         struct intel_iommu *iommu = domain_get_iommu(domain);
636
637         /* get last level pte */
638         pte = dma_addr_level_pte(domain, addr, 1);
639
640         if (pte) {
641                 dma_clear_pte(pte);
642                 __iommu_flush_cache(iommu, pte, sizeof(*pte));
643         }
644 }
645
646 /* clear last level pte, a tlb flush should be followed */
647 static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
648 {
649         int addr_width = agaw_to_width(domain->agaw);
650
651         start &= (((u64)1) << addr_width) - 1;
652         end &= (((u64)1) << addr_width) - 1;
653         /* in case it's partial page */
654         start = PAGE_ALIGN(start);
655         end &= PAGE_MASK;
656
657         /* we don't need lock here, nobody else touches the iova range */
658         while (start < end) {
659                 dma_pte_clear_one(domain, start);
660                 start += VTD_PAGE_SIZE;
661         }
662 }
663
664 /* free page table pages. last level pte should already be cleared */
665 static void dma_pte_free_pagetable(struct dmar_domain *domain,
666         u64 start, u64 end)
667 {
668         int addr_width = agaw_to_width(domain->agaw);
669         struct dma_pte *pte;
670         int total = agaw_to_level(domain->agaw);
671         int level;
672         u64 tmp;
673         struct intel_iommu *iommu = domain_get_iommu(domain);
674
675         start &= (((u64)1) << addr_width) - 1;
676         end &= (((u64)1) << addr_width) - 1;
677
678         /* we don't need lock here, nobody else touches the iova range */
679         level = 2;
680         while (level <= total) {
681                 tmp = align_to_level(start, level);
682                 if (tmp >= end || (tmp + level_size(level) > end))
683                         return;
684
685                 while (tmp < end) {
686                         pte = dma_addr_level_pte(domain, tmp, level);
687                         if (pte) {
688                                 free_pgtable_page(
689                                         phys_to_virt(dma_pte_addr(pte)));
690                                 dma_clear_pte(pte);
691                                 __iommu_flush_cache(iommu,
692                                                 pte, sizeof(*pte));
693                         }
694                         tmp += level_size(level);
695                 }
696                 level++;
697         }
698         /* free pgd */
699         if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
700                 free_pgtable_page(domain->pgd);
701                 domain->pgd = NULL;
702         }
703 }
704
705 /* iommu handling */
706 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
707 {
708         struct root_entry *root;
709         unsigned long flags;
710
711         root = (struct root_entry *)alloc_pgtable_page();
712         if (!root)
713                 return -ENOMEM;
714
715         __iommu_flush_cache(iommu, root, ROOT_SIZE);
716
717         spin_lock_irqsave(&iommu->lock, flags);
718         iommu->root_entry = root;
719         spin_unlock_irqrestore(&iommu->lock, flags);
720
721         return 0;
722 }
723
724 static void iommu_set_root_entry(struct intel_iommu *iommu)
725 {
726         void *addr;
727         u32 cmd, sts;
728         unsigned long flag;
729
730         addr = iommu->root_entry;
731
732         spin_lock_irqsave(&iommu->register_lock, flag);
733         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
734
735         cmd = iommu->gcmd | DMA_GCMD_SRTP;
736         writel(cmd, iommu->reg + DMAR_GCMD_REG);
737
738         /* Make sure hardware complete it */
739         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
740                 readl, (sts & DMA_GSTS_RTPS), sts);
741
742         spin_unlock_irqrestore(&iommu->register_lock, flag);
743 }
744
745 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
746 {
747         u32 val;
748         unsigned long flag;
749
750         if (!cap_rwbf(iommu->cap))
751                 return;
752         val = iommu->gcmd | DMA_GCMD_WBF;
753
754         spin_lock_irqsave(&iommu->register_lock, flag);
755         writel(val, iommu->reg + DMAR_GCMD_REG);
756
757         /* Make sure hardware complete it */
758         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
759                         readl, (!(val & DMA_GSTS_WBFS)), val);
760
761         spin_unlock_irqrestore(&iommu->register_lock, flag);
762 }
763
764 /* return value determine if we need a write buffer flush */
765 static int __iommu_flush_context(struct intel_iommu *iommu,
766         u16 did, u16 source_id, u8 function_mask, u64 type,
767         int non_present_entry_flush)
768 {
769         u64 val = 0;
770         unsigned long flag;
771
772         /*
773          * In the non-present entry flush case, if hardware doesn't cache
774          * non-present entry we do nothing and if hardware cache non-present
775          * entry, we flush entries of domain 0 (the domain id is used to cache
776          * any non-present entries)
777          */
778         if (non_present_entry_flush) {
779                 if (!cap_caching_mode(iommu->cap))
780                         return 1;
781                 else
782                         did = 0;
783         }
784
785         switch (type) {
786         case DMA_CCMD_GLOBAL_INVL:
787                 val = DMA_CCMD_GLOBAL_INVL;
788                 break;
789         case DMA_CCMD_DOMAIN_INVL:
790                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
791                 break;
792         case DMA_CCMD_DEVICE_INVL:
793                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
794                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
795                 break;
796         default:
797                 BUG();
798         }
799         val |= DMA_CCMD_ICC;
800
801         spin_lock_irqsave(&iommu->register_lock, flag);
802         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
803
804         /* Make sure hardware complete it */
805         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
806                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
807
808         spin_unlock_irqrestore(&iommu->register_lock, flag);
809
810         /* flush context entry will implicitly flush write buffer */
811         return 0;
812 }
813
814 /* return value determine if we need a write buffer flush */
815 static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
816         u64 addr, unsigned int size_order, u64 type,
817         int non_present_entry_flush)
818 {
819         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
820         u64 val = 0, val_iva = 0;
821         unsigned long flag;
822
823         /*
824          * In the non-present entry flush case, if hardware doesn't cache
825          * non-present entry we do nothing and if hardware cache non-present
826          * entry, we flush entries of domain 0 (the domain id is used to cache
827          * any non-present entries)
828          */
829         if (non_present_entry_flush) {
830                 if (!cap_caching_mode(iommu->cap))
831                         return 1;
832                 else
833                         did = 0;
834         }
835
836         switch (type) {
837         case DMA_TLB_GLOBAL_FLUSH:
838                 /* global flush doesn't need set IVA_REG */
839                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
840                 break;
841         case DMA_TLB_DSI_FLUSH:
842                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
843                 break;
844         case DMA_TLB_PSI_FLUSH:
845                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
846                 /* Note: always flush non-leaf currently */
847                 val_iva = size_order | addr;
848                 break;
849         default:
850                 BUG();
851         }
852         /* Note: set drain read/write */
853 #if 0
854         /*
855          * This is probably to be super secure.. Looks like we can
856          * ignore it without any impact.
857          */
858         if (cap_read_drain(iommu->cap))
859                 val |= DMA_TLB_READ_DRAIN;
860 #endif
861         if (cap_write_drain(iommu->cap))
862                 val |= DMA_TLB_WRITE_DRAIN;
863
864         spin_lock_irqsave(&iommu->register_lock, flag);
865         /* Note: Only uses first TLB reg currently */
866         if (val_iva)
867                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
868         dmar_writeq(iommu->reg + tlb_offset + 8, val);
869
870         /* Make sure hardware complete it */
871         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
872                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
873
874         spin_unlock_irqrestore(&iommu->register_lock, flag);
875
876         /* check IOTLB invalidation granularity */
877         if (DMA_TLB_IAIG(val) == 0)
878                 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
879         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
880                 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
881                         (unsigned long long)DMA_TLB_IIRG(type),
882                         (unsigned long long)DMA_TLB_IAIG(val));
883         /* flush iotlb entry will implicitly flush write buffer */
884         return 0;
885 }
886
887 static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
888         u64 addr, unsigned int pages, int non_present_entry_flush)
889 {
890         unsigned int mask;
891
892         BUG_ON(addr & (~VTD_PAGE_MASK));
893         BUG_ON(pages == 0);
894
895         /* Fallback to domain selective flush if no PSI support */
896         if (!cap_pgsel_inv(iommu->cap))
897                 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
898                                                 DMA_TLB_DSI_FLUSH,
899                                                 non_present_entry_flush);
900
901         /*
902          * PSI requires page size to be 2 ^ x, and the base address is naturally
903          * aligned to the size
904          */
905         mask = ilog2(__roundup_pow_of_two(pages));
906         /* Fallback to domain selective flush if size is too big */
907         if (mask > cap_max_amask_val(iommu->cap))
908                 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
909                         DMA_TLB_DSI_FLUSH, non_present_entry_flush);
910
911         return iommu->flush.flush_iotlb(iommu, did, addr, mask,
912                                         DMA_TLB_PSI_FLUSH,
913                                         non_present_entry_flush);
914 }
915
916 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
917 {
918         u32 pmen;
919         unsigned long flags;
920
921         spin_lock_irqsave(&iommu->register_lock, flags);
922         pmen = readl(iommu->reg + DMAR_PMEN_REG);
923         pmen &= ~DMA_PMEN_EPM;
924         writel(pmen, iommu->reg + DMAR_PMEN_REG);
925
926         /* wait for the protected region status bit to clear */
927         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
928                 readl, !(pmen & DMA_PMEN_PRS), pmen);
929
930         spin_unlock_irqrestore(&iommu->register_lock, flags);
931 }
932
933 static int iommu_enable_translation(struct intel_iommu *iommu)
934 {
935         u32 sts;
936         unsigned long flags;
937
938         spin_lock_irqsave(&iommu->register_lock, flags);
939         writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
940
941         /* Make sure hardware complete it */
942         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
943                 readl, (sts & DMA_GSTS_TES), sts);
944
945         iommu->gcmd |= DMA_GCMD_TE;
946         spin_unlock_irqrestore(&iommu->register_lock, flags);
947         return 0;
948 }
949
950 static int iommu_disable_translation(struct intel_iommu *iommu)
951 {
952         u32 sts;
953         unsigned long flag;
954
955         spin_lock_irqsave(&iommu->register_lock, flag);
956         iommu->gcmd &= ~DMA_GCMD_TE;
957         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
958
959         /* Make sure hardware complete it */
960         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
961                 readl, (!(sts & DMA_GSTS_TES)), sts);
962
963         spin_unlock_irqrestore(&iommu->register_lock, flag);
964         return 0;
965 }
966
967 /* iommu interrupt handling. Most stuff are MSI-like. */
968
969 static const char *fault_reason_strings[] =
970 {
971         "Software",
972         "Present bit in root entry is clear",
973         "Present bit in context entry is clear",
974         "Invalid context entry",
975         "Access beyond MGAW",
976         "PTE Write access is not set",
977         "PTE Read access is not set",
978         "Next page table ptr is invalid",
979         "Root table address invalid",
980         "Context table ptr is invalid",
981         "non-zero reserved fields in RTP",
982         "non-zero reserved fields in CTP",
983         "non-zero reserved fields in PTE",
984 };
985 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
986
987 const char *dmar_get_fault_reason(u8 fault_reason)
988 {
989         if (fault_reason > MAX_FAULT_REASON_IDX)
990                 return "Unknown";
991         else
992                 return fault_reason_strings[fault_reason];
993 }
994
995 void dmar_msi_unmask(unsigned int irq)
996 {
997         struct intel_iommu *iommu = get_irq_data(irq);
998         unsigned long flag;
999
1000         /* unmask it */
1001         spin_lock_irqsave(&iommu->register_lock, flag);
1002         writel(0, iommu->reg + DMAR_FECTL_REG);
1003         /* Read a reg to force flush the post write */
1004         readl(iommu->reg + DMAR_FECTL_REG);
1005         spin_unlock_irqrestore(&iommu->register_lock, flag);
1006 }
1007
1008 void dmar_msi_mask(unsigned int irq)
1009 {
1010         unsigned long flag;
1011         struct intel_iommu *iommu = get_irq_data(irq);
1012
1013         /* mask it */
1014         spin_lock_irqsave(&iommu->register_lock, flag);
1015         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1016         /* Read a reg to force flush the post write */
1017         readl(iommu->reg + DMAR_FECTL_REG);
1018         spin_unlock_irqrestore(&iommu->register_lock, flag);
1019 }
1020
1021 void dmar_msi_write(int irq, struct msi_msg *msg)
1022 {
1023         struct intel_iommu *iommu = get_irq_data(irq);
1024         unsigned long flag;
1025
1026         spin_lock_irqsave(&iommu->register_lock, flag);
1027         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1028         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1029         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1030         spin_unlock_irqrestore(&iommu->register_lock, flag);
1031 }
1032
1033 void dmar_msi_read(int irq, struct msi_msg *msg)
1034 {
1035         struct intel_iommu *iommu = get_irq_data(irq);
1036         unsigned long flag;
1037
1038         spin_lock_irqsave(&iommu->register_lock, flag);
1039         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1040         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1041         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1042         spin_unlock_irqrestore(&iommu->register_lock, flag);
1043 }
1044
1045 static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
1046                 u8 fault_reason, u16 source_id, unsigned long long addr)
1047 {
1048         const char *reason;
1049
1050         reason = dmar_get_fault_reason(fault_reason);
1051
1052         printk(KERN_ERR
1053                 "DMAR:[%s] Request device [%02x:%02x.%d] "
1054                 "fault addr %llx \n"
1055                 "DMAR:[fault reason %02d] %s\n",
1056                 (type ? "DMA Read" : "DMA Write"),
1057                 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1058                 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1059         return 0;
1060 }
1061
1062 #define PRIMARY_FAULT_REG_LEN (16)
1063 static irqreturn_t iommu_page_fault(int irq, void *dev_id)
1064 {
1065         struct intel_iommu *iommu = dev_id;
1066         int reg, fault_index;
1067         u32 fault_status;
1068         unsigned long flag;
1069
1070         spin_lock_irqsave(&iommu->register_lock, flag);
1071         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1072
1073         /* TBD: ignore advanced fault log currently */
1074         if (!(fault_status & DMA_FSTS_PPF))
1075                 goto clear_overflow;
1076
1077         fault_index = dma_fsts_fault_record_index(fault_status);
1078         reg = cap_fault_reg_offset(iommu->cap);
1079         while (1) {
1080                 u8 fault_reason;
1081                 u16 source_id;
1082                 u64 guest_addr;
1083                 int type;
1084                 u32 data;
1085
1086                 /* highest 32 bits */
1087                 data = readl(iommu->reg + reg +
1088                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1089                 if (!(data & DMA_FRCD_F))
1090                         break;
1091
1092                 fault_reason = dma_frcd_fault_reason(data);
1093                 type = dma_frcd_type(data);
1094
1095                 data = readl(iommu->reg + reg +
1096                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1097                 source_id = dma_frcd_source_id(data);
1098
1099                 guest_addr = dmar_readq(iommu->reg + reg +
1100                                 fault_index * PRIMARY_FAULT_REG_LEN);
1101                 guest_addr = dma_frcd_page_addr(guest_addr);
1102                 /* clear the fault */
1103                 writel(DMA_FRCD_F, iommu->reg + reg +
1104                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1105
1106                 spin_unlock_irqrestore(&iommu->register_lock, flag);
1107
1108                 iommu_page_fault_do_one(iommu, type, fault_reason,
1109                                 source_id, guest_addr);
1110
1111                 fault_index++;
1112                 if (fault_index > cap_num_fault_regs(iommu->cap))
1113                         fault_index = 0;
1114                 spin_lock_irqsave(&iommu->register_lock, flag);
1115         }
1116 clear_overflow:
1117         /* clear primary fault overflow */
1118         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1119         if (fault_status & DMA_FSTS_PFO)
1120                 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
1121
1122         spin_unlock_irqrestore(&iommu->register_lock, flag);
1123         return IRQ_HANDLED;
1124 }
1125
1126 int dmar_set_interrupt(struct intel_iommu *iommu)
1127 {
1128         int irq, ret;
1129
1130         irq = create_irq();
1131         if (!irq) {
1132                 printk(KERN_ERR "IOMMU: no free vectors\n");
1133                 return -EINVAL;
1134         }
1135
1136         set_irq_data(irq, iommu);
1137         iommu->irq = irq;
1138
1139         ret = arch_setup_dmar_msi(irq);
1140         if (ret) {
1141                 set_irq_data(irq, NULL);
1142                 iommu->irq = 0;
1143                 destroy_irq(irq);
1144                 return 0;
1145         }
1146
1147         /* Force fault register is cleared */
1148         iommu_page_fault(irq, iommu);
1149
1150         ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
1151         if (ret)
1152                 printk(KERN_ERR "IOMMU: can't request irq\n");
1153         return ret;
1154 }
1155
1156 static int iommu_init_domains(struct intel_iommu *iommu)
1157 {
1158         unsigned long ndomains;
1159         unsigned long nlongs;
1160
1161         ndomains = cap_ndoms(iommu->cap);
1162         pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1163         nlongs = BITS_TO_LONGS(ndomains);
1164
1165         /* TBD: there might be 64K domains,
1166          * consider other allocation for future chip
1167          */
1168         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1169         if (!iommu->domain_ids) {
1170                 printk(KERN_ERR "Allocating domain id array failed\n");
1171                 return -ENOMEM;
1172         }
1173         iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1174                         GFP_KERNEL);
1175         if (!iommu->domains) {
1176                 printk(KERN_ERR "Allocating domain array failed\n");
1177                 kfree(iommu->domain_ids);
1178                 return -ENOMEM;
1179         }
1180
1181         spin_lock_init(&iommu->lock);
1182
1183         /*
1184          * if Caching mode is set, then invalid translations are tagged
1185          * with domainid 0. Hence we need to pre-allocate it.
1186          */
1187         if (cap_caching_mode(iommu->cap))
1188                 set_bit(0, iommu->domain_ids);
1189         return 0;
1190 }
1191
1192
1193 static void domain_exit(struct dmar_domain *domain);
1194
1195 void free_dmar_iommu(struct intel_iommu *iommu)
1196 {
1197         struct dmar_domain *domain;
1198         int i;
1199
1200         i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1201         for (; i < cap_ndoms(iommu->cap); ) {
1202                 domain = iommu->domains[i];
1203                 clear_bit(i, iommu->domain_ids);
1204                 domain_exit(domain);
1205                 i = find_next_bit(iommu->domain_ids,
1206                         cap_ndoms(iommu->cap), i+1);
1207         }
1208
1209         if (iommu->gcmd & DMA_GCMD_TE)
1210                 iommu_disable_translation(iommu);
1211
1212         if (iommu->irq) {
1213                 set_irq_data(iommu->irq, NULL);
1214                 /* This will mask the irq */
1215                 free_irq(iommu->irq, iommu);
1216                 destroy_irq(iommu->irq);
1217         }
1218
1219         kfree(iommu->domains);
1220         kfree(iommu->domain_ids);
1221
1222         g_iommus[iommu->seq_id] = NULL;
1223
1224         /* if all iommus are freed, free g_iommus */
1225         for (i = 0; i < g_num_of_iommus; i++) {
1226                 if (g_iommus[i])
1227                         break;
1228         }
1229
1230         if (i == g_num_of_iommus)
1231                 kfree(g_iommus);
1232
1233         /* free context mapping */
1234         free_context_table(iommu);
1235 }
1236
1237 static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1238 {
1239         unsigned long num;
1240         unsigned long ndomains;
1241         struct dmar_domain *domain;
1242         unsigned long flags;
1243
1244         domain = alloc_domain_mem();
1245         if (!domain)
1246                 return NULL;
1247
1248         ndomains = cap_ndoms(iommu->cap);
1249
1250         spin_lock_irqsave(&iommu->lock, flags);
1251         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1252         if (num >= ndomains) {
1253                 spin_unlock_irqrestore(&iommu->lock, flags);
1254                 free_domain_mem(domain);
1255                 printk(KERN_ERR "IOMMU: no free domain ids\n");
1256                 return NULL;
1257         }
1258
1259         set_bit(num, iommu->domain_ids);
1260         domain->id = num;
1261         memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1262         set_bit(iommu->seq_id, &domain->iommu_bmp);
1263         domain->flags = 0;
1264         iommu->domains[num] = domain;
1265         spin_unlock_irqrestore(&iommu->lock, flags);
1266
1267         return domain;
1268 }
1269
1270 static void iommu_free_domain(struct dmar_domain *domain)
1271 {
1272         unsigned long flags;
1273         struct intel_iommu *iommu;
1274
1275         iommu = domain_get_iommu(domain);
1276
1277         spin_lock_irqsave(&iommu->lock, flags);
1278         clear_bit(domain->id, iommu->domain_ids);
1279         spin_unlock_irqrestore(&iommu->lock, flags);
1280 }
1281
1282 static struct iova_domain reserved_iova_list;
1283 static struct lock_class_key reserved_alloc_key;
1284 static struct lock_class_key reserved_rbtree_key;
1285
1286 static void dmar_init_reserved_ranges(void)
1287 {
1288         struct pci_dev *pdev = NULL;
1289         struct iova *iova;
1290         int i;
1291         u64 addr, size;
1292
1293         init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1294
1295         lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1296                 &reserved_alloc_key);
1297         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1298                 &reserved_rbtree_key);
1299
1300         /* IOAPIC ranges shouldn't be accessed by DMA */
1301         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1302                 IOVA_PFN(IOAPIC_RANGE_END));
1303         if (!iova)
1304                 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1305
1306         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1307         for_each_pci_dev(pdev) {
1308                 struct resource *r;
1309
1310                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1311                         r = &pdev->resource[i];
1312                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1313                                 continue;
1314                         addr = r->start;
1315                         addr &= PAGE_MASK;
1316                         size = r->end - addr;
1317                         size = PAGE_ALIGN(size);
1318                         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1319                                 IOVA_PFN(size + addr) - 1);
1320                         if (!iova)
1321                                 printk(KERN_ERR "Reserve iova failed\n");
1322                 }
1323         }
1324
1325 }
1326
1327 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1328 {
1329         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1330 }
1331
1332 static inline int guestwidth_to_adjustwidth(int gaw)
1333 {
1334         int agaw;
1335         int r = (gaw - 12) % 9;
1336
1337         if (r == 0)
1338                 agaw = gaw;
1339         else
1340                 agaw = gaw + 9 - r;
1341         if (agaw > 64)
1342                 agaw = 64;
1343         return agaw;
1344 }
1345
1346 static int domain_init(struct dmar_domain *domain, int guest_width)
1347 {
1348         struct intel_iommu *iommu;
1349         int adjust_width, agaw;
1350         unsigned long sagaw;
1351
1352         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1353         spin_lock_init(&domain->mapping_lock);
1354
1355         domain_reserve_special_ranges(domain);
1356
1357         /* calculate AGAW */
1358         iommu = domain_get_iommu(domain);
1359         if (guest_width > cap_mgaw(iommu->cap))
1360                 guest_width = cap_mgaw(iommu->cap);
1361         domain->gaw = guest_width;
1362         adjust_width = guestwidth_to_adjustwidth(guest_width);
1363         agaw = width_to_agaw(adjust_width);
1364         sagaw = cap_sagaw(iommu->cap);
1365         if (!test_bit(agaw, &sagaw)) {
1366                 /* hardware doesn't support it, choose a bigger one */
1367                 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1368                 agaw = find_next_bit(&sagaw, 5, agaw);
1369                 if (agaw >= 5)
1370                         return -ENODEV;
1371         }
1372         domain->agaw = agaw;
1373         INIT_LIST_HEAD(&domain->devices);
1374
1375         if (ecap_coherent(iommu->ecap))
1376                 domain->iommu_coherency = 1;
1377         else
1378                 domain->iommu_coherency = 0;
1379
1380         /* always allocate the top pgd */
1381         domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1382         if (!domain->pgd)
1383                 return -ENOMEM;
1384         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1385         return 0;
1386 }
1387
1388 static void domain_exit(struct dmar_domain *domain)
1389 {
1390         u64 end;
1391
1392         /* Domain 0 is reserved, so dont process it */
1393         if (!domain)
1394                 return;
1395
1396         domain_remove_dev_info(domain);
1397         /* destroy iovas */
1398         put_iova_domain(&domain->iovad);
1399         end = DOMAIN_MAX_ADDR(domain->gaw);
1400         end = end & (~PAGE_MASK);
1401
1402         /* clear ptes */
1403         dma_pte_clear_range(domain, 0, end);
1404
1405         /* free page tables */
1406         dma_pte_free_pagetable(domain, 0, end);
1407
1408         iommu_free_domain(domain);
1409         free_domain_mem(domain);
1410 }
1411
1412 static int domain_context_mapping_one(struct dmar_domain *domain,
1413                 u8 bus, u8 devfn)
1414 {
1415         struct context_entry *context;
1416         struct intel_iommu *iommu = domain_get_iommu(domain);
1417         unsigned long flags;
1418
1419         pr_debug("Set context mapping for %02x:%02x.%d\n",
1420                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1421         BUG_ON(!domain->pgd);
1422         context = device_to_context_entry(iommu, bus, devfn);
1423         if (!context)
1424                 return -ENOMEM;
1425         spin_lock_irqsave(&iommu->lock, flags);
1426         if (context_present(context)) {
1427                 spin_unlock_irqrestore(&iommu->lock, flags);
1428                 return 0;
1429         }
1430
1431         context_set_domain_id(context, domain->id);
1432         context_set_address_width(context, domain->agaw);
1433         context_set_address_root(context, virt_to_phys(domain->pgd));
1434         context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL);
1435         context_set_fault_enable(context);
1436         context_set_present(context);
1437         __iommu_flush_cache(iommu, context, sizeof(*context));
1438
1439         /* it's a non-present to present mapping */
1440         if (iommu->flush.flush_context(iommu, domain->id,
1441                 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1442                 DMA_CCMD_DEVICE_INVL, 1))
1443                 iommu_flush_write_buffer(iommu);
1444         else
1445                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1446
1447         spin_unlock_irqrestore(&iommu->lock, flags);
1448         return 0;
1449 }
1450
1451 static int
1452 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1453 {
1454         int ret;
1455         struct pci_dev *tmp, *parent;
1456
1457         ret = domain_context_mapping_one(domain, pdev->bus->number,
1458                 pdev->devfn);
1459         if (ret)
1460                 return ret;
1461
1462         /* dependent device mapping */
1463         tmp = pci_find_upstream_pcie_bridge(pdev);
1464         if (!tmp)
1465                 return 0;
1466         /* Secondary interface's bus number and devfn 0 */
1467         parent = pdev->bus->self;
1468         while (parent != tmp) {
1469                 ret = domain_context_mapping_one(domain, parent->bus->number,
1470                         parent->devfn);
1471                 if (ret)
1472                         return ret;
1473                 parent = parent->bus->self;
1474         }
1475         if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1476                 return domain_context_mapping_one(domain,
1477                         tmp->subordinate->number, 0);
1478         else /* this is a legacy PCI bridge */
1479                 return domain_context_mapping_one(domain,
1480                         tmp->bus->number, tmp->devfn);
1481 }
1482
1483 static int domain_context_mapped(struct dmar_domain *domain,
1484         struct pci_dev *pdev)
1485 {
1486         int ret;
1487         struct pci_dev *tmp, *parent;
1488         struct intel_iommu *iommu = domain_get_iommu(domain);
1489
1490         ret = device_context_mapped(iommu,
1491                 pdev->bus->number, pdev->devfn);
1492         if (!ret)
1493                 return ret;
1494         /* dependent device mapping */
1495         tmp = pci_find_upstream_pcie_bridge(pdev);
1496         if (!tmp)
1497                 return ret;
1498         /* Secondary interface's bus number and devfn 0 */
1499         parent = pdev->bus->self;
1500         while (parent != tmp) {
1501                 ret = device_context_mapped(iommu, parent->bus->number,
1502                         parent->devfn);
1503                 if (!ret)
1504                         return ret;
1505                 parent = parent->bus->self;
1506         }
1507         if (tmp->is_pcie)
1508                 return device_context_mapped(iommu,
1509                         tmp->subordinate->number, 0);
1510         else
1511                 return device_context_mapped(iommu,
1512                         tmp->bus->number, tmp->devfn);
1513 }
1514
1515 static int
1516 domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1517                         u64 hpa, size_t size, int prot)
1518 {
1519         u64 start_pfn, end_pfn;
1520         struct dma_pte *pte;
1521         int index;
1522         int addr_width = agaw_to_width(domain->agaw);
1523         struct intel_iommu *iommu = domain_get_iommu(domain);
1524
1525         hpa &= (((u64)1) << addr_width) - 1;
1526
1527         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1528                 return -EINVAL;
1529         iova &= PAGE_MASK;
1530         start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1531         end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
1532         index = 0;
1533         while (start_pfn < end_pfn) {
1534                 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
1535                 if (!pte)
1536                         return -ENOMEM;
1537                 /* We don't need lock here, nobody else
1538                  * touches the iova range
1539                  */
1540                 BUG_ON(dma_pte_addr(pte));
1541                 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
1542                 dma_set_pte_prot(pte, prot);
1543                 __iommu_flush_cache(iommu, pte, sizeof(*pte));
1544                 start_pfn++;
1545                 index++;
1546         }
1547         return 0;
1548 }
1549
1550 static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1551 {
1552         struct intel_iommu *iommu = domain_get_iommu(domain);
1553
1554         clear_context_table(iommu, bus, devfn);
1555         iommu->flush.flush_context(iommu, 0, 0, 0,
1556                                            DMA_CCMD_GLOBAL_INVL, 0);
1557         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1558                                          DMA_TLB_GLOBAL_FLUSH, 0);
1559 }
1560
1561 static void domain_remove_dev_info(struct dmar_domain *domain)
1562 {
1563         struct device_domain_info *info;
1564         unsigned long flags;
1565
1566         spin_lock_irqsave(&device_domain_lock, flags);
1567         while (!list_empty(&domain->devices)) {
1568                 info = list_entry(domain->devices.next,
1569                         struct device_domain_info, link);
1570                 list_del(&info->link);
1571                 list_del(&info->global);
1572                 if (info->dev)
1573                         info->dev->dev.archdata.iommu = NULL;
1574                 spin_unlock_irqrestore(&device_domain_lock, flags);
1575
1576                 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1577                 free_devinfo_mem(info);
1578
1579                 spin_lock_irqsave(&device_domain_lock, flags);
1580         }
1581         spin_unlock_irqrestore(&device_domain_lock, flags);
1582 }
1583
1584 /*
1585  * find_domain
1586  * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1587  */
1588 static struct dmar_domain *
1589 find_domain(struct pci_dev *pdev)
1590 {
1591         struct device_domain_info *info;
1592
1593         /* No lock here, assumes no domain exit in normal case */
1594         info = pdev->dev.archdata.iommu;
1595         if (info)
1596                 return info->domain;
1597         return NULL;
1598 }
1599
1600 /* domain is initialized */
1601 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1602 {
1603         struct dmar_domain *domain, *found = NULL;
1604         struct intel_iommu *iommu;
1605         struct dmar_drhd_unit *drhd;
1606         struct device_domain_info *info, *tmp;
1607         struct pci_dev *dev_tmp;
1608         unsigned long flags;
1609         int bus = 0, devfn = 0;
1610
1611         domain = find_domain(pdev);
1612         if (domain)
1613                 return domain;
1614
1615         dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1616         if (dev_tmp) {
1617                 if (dev_tmp->is_pcie) {
1618                         bus = dev_tmp->subordinate->number;
1619                         devfn = 0;
1620                 } else {
1621                         bus = dev_tmp->bus->number;
1622                         devfn = dev_tmp->devfn;
1623                 }
1624                 spin_lock_irqsave(&device_domain_lock, flags);
1625                 list_for_each_entry(info, &device_domain_list, global) {
1626                         if (info->bus == bus && info->devfn == devfn) {
1627                                 found = info->domain;
1628                                 break;
1629                         }
1630                 }
1631                 spin_unlock_irqrestore(&device_domain_lock, flags);
1632                 /* pcie-pci bridge already has a domain, uses it */
1633                 if (found) {
1634                         domain = found;
1635                         goto found_domain;
1636                 }
1637         }
1638
1639         /* Allocate new domain for the device */
1640         drhd = dmar_find_matched_drhd_unit(pdev);
1641         if (!drhd) {
1642                 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1643                         pci_name(pdev));
1644                 return NULL;
1645         }
1646         iommu = drhd->iommu;
1647
1648         domain = iommu_alloc_domain(iommu);
1649         if (!domain)
1650                 goto error;
1651
1652         if (domain_init(domain, gaw)) {
1653                 domain_exit(domain);
1654                 goto error;
1655         }
1656
1657         /* register pcie-to-pci device */
1658         if (dev_tmp) {
1659                 info = alloc_devinfo_mem();
1660                 if (!info) {
1661                         domain_exit(domain);
1662                         goto error;
1663                 }
1664                 info->bus = bus;
1665                 info->devfn = devfn;
1666                 info->dev = NULL;
1667                 info->domain = domain;
1668                 /* This domain is shared by devices under p2p bridge */
1669                 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
1670
1671                 /* pcie-to-pci bridge already has a domain, uses it */
1672                 found = NULL;
1673                 spin_lock_irqsave(&device_domain_lock, flags);
1674                 list_for_each_entry(tmp, &device_domain_list, global) {
1675                         if (tmp->bus == bus && tmp->devfn == devfn) {
1676                                 found = tmp->domain;
1677                                 break;
1678                         }
1679                 }
1680                 if (found) {
1681                         free_devinfo_mem(info);
1682                         domain_exit(domain);
1683                         domain = found;
1684                 } else {
1685                         list_add(&info->link, &domain->devices);
1686                         list_add(&info->global, &device_domain_list);
1687                 }
1688                 spin_unlock_irqrestore(&device_domain_lock, flags);
1689         }
1690
1691 found_domain:
1692         info = alloc_devinfo_mem();
1693         if (!info)
1694                 goto error;
1695         info->bus = pdev->bus->number;
1696         info->devfn = pdev->devfn;
1697         info->dev = pdev;
1698         info->domain = domain;
1699         spin_lock_irqsave(&device_domain_lock, flags);
1700         /* somebody is fast */
1701         found = find_domain(pdev);
1702         if (found != NULL) {
1703                 spin_unlock_irqrestore(&device_domain_lock, flags);
1704                 if (found != domain) {
1705                         domain_exit(domain);
1706                         domain = found;
1707                 }
1708                 free_devinfo_mem(info);
1709                 return domain;
1710         }
1711         list_add(&info->link, &domain->devices);
1712         list_add(&info->global, &device_domain_list);
1713         pdev->dev.archdata.iommu = info;
1714         spin_unlock_irqrestore(&device_domain_lock, flags);
1715         return domain;
1716 error:
1717         /* recheck it here, maybe others set it */
1718         return find_domain(pdev);
1719 }
1720
1721 static int iommu_prepare_identity_map(struct pci_dev *pdev,
1722                                       unsigned long long start,
1723                                       unsigned long long end)
1724 {
1725         struct dmar_domain *domain;
1726         unsigned long size;
1727         unsigned long long base;
1728         int ret;
1729
1730         printk(KERN_INFO
1731                 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1732                 pci_name(pdev), start, end);
1733         /* page table init */
1734         domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1735         if (!domain)
1736                 return -ENOMEM;
1737
1738         /* The address might not be aligned */
1739         base = start & PAGE_MASK;
1740         size = end - base;
1741         size = PAGE_ALIGN(size);
1742         if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1743                         IOVA_PFN(base + size) - 1)) {
1744                 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1745                 ret = -ENOMEM;
1746                 goto error;
1747         }
1748
1749         pr_debug("Mapping reserved region %lx@%llx for %s\n",
1750                 size, base, pci_name(pdev));
1751         /*
1752          * RMRR range might have overlap with physical memory range,
1753          * clear it first
1754          */
1755         dma_pte_clear_range(domain, base, base + size);
1756
1757         ret = domain_page_mapping(domain, base, base, size,
1758                 DMA_PTE_READ|DMA_PTE_WRITE);
1759         if (ret)
1760                 goto error;
1761
1762         /* context entry init */
1763         ret = domain_context_mapping(domain, pdev);
1764         if (!ret)
1765                 return 0;
1766 error:
1767         domain_exit(domain);
1768         return ret;
1769
1770 }
1771
1772 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1773         struct pci_dev *pdev)
1774 {
1775         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1776                 return 0;
1777         return iommu_prepare_identity_map(pdev, rmrr->base_address,
1778                 rmrr->end_address + 1);
1779 }
1780
1781 #ifdef CONFIG_DMAR_GFX_WA
1782 struct iommu_prepare_data {
1783         struct pci_dev *pdev;
1784         int ret;
1785 };
1786
1787 static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1788                                          unsigned long end_pfn, void *datax)
1789 {
1790         struct iommu_prepare_data *data;
1791
1792         data = (struct iommu_prepare_data *)datax;
1793
1794         data->ret = iommu_prepare_identity_map(data->pdev,
1795                                 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1796         return data->ret;
1797
1798 }
1799
1800 static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1801 {
1802         int nid;
1803         struct iommu_prepare_data data;
1804
1805         data.pdev = pdev;
1806         data.ret = 0;
1807
1808         for_each_online_node(nid) {
1809                 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1810                 if (data.ret)
1811                         return data.ret;
1812         }
1813         return data.ret;
1814 }
1815
1816 static void __init iommu_prepare_gfx_mapping(void)
1817 {
1818         struct pci_dev *pdev = NULL;
1819         int ret;
1820
1821         for_each_pci_dev(pdev) {
1822                 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1823                                 !IS_GFX_DEVICE(pdev))
1824                         continue;
1825                 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1826                         pci_name(pdev));
1827                 ret = iommu_prepare_with_active_regions(pdev);
1828                 if (ret)
1829                         printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1830         }
1831 }
1832 #else /* !CONFIG_DMAR_GFX_WA */
1833 static inline void iommu_prepare_gfx_mapping(void)
1834 {
1835         return;
1836 }
1837 #endif
1838
1839 #ifdef CONFIG_DMAR_FLOPPY_WA
1840 static inline void iommu_prepare_isa(void)
1841 {
1842         struct pci_dev *pdev;
1843         int ret;
1844
1845         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1846         if (!pdev)
1847                 return;
1848
1849         printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1850         ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1851
1852         if (ret)
1853                 printk("IOMMU: Failed to create 0-64M identity map, "
1854                         "floppy might not work\n");
1855
1856 }
1857 #else
1858 static inline void iommu_prepare_isa(void)
1859 {
1860         return;
1861 }
1862 #endif /* !CONFIG_DMAR_FLPY_WA */
1863
1864 static int __init init_dmars(void)
1865 {
1866         struct dmar_drhd_unit *drhd;
1867         struct dmar_rmrr_unit *rmrr;
1868         struct pci_dev *pdev;
1869         struct intel_iommu *iommu;
1870         int i, ret, unit = 0;
1871
1872         /*
1873          * for each drhd
1874          *    allocate root
1875          *    initialize and program root entry to not present
1876          * endfor
1877          */
1878         for_each_drhd_unit(drhd) {
1879                 g_num_of_iommus++;
1880                 /*
1881                  * lock not needed as this is only incremented in the single
1882                  * threaded kernel __init code path all other access are read
1883                  * only
1884                  */
1885         }
1886
1887         g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
1888                         GFP_KERNEL);
1889         if (!g_iommus) {
1890                 printk(KERN_ERR "Allocating global iommu array failed\n");
1891                 ret = -ENOMEM;
1892                 goto error;
1893         }
1894
1895         deferred_flush = kzalloc(g_num_of_iommus *
1896                 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1897         if (!deferred_flush) {
1898                 kfree(g_iommus);
1899                 ret = -ENOMEM;
1900                 goto error;
1901         }
1902
1903         for_each_drhd_unit(drhd) {
1904                 if (drhd->ignored)
1905                         continue;
1906
1907                 iommu = drhd->iommu;
1908                 g_iommus[iommu->seq_id] = iommu;
1909
1910                 ret = iommu_init_domains(iommu);
1911                 if (ret)
1912                         goto error;
1913
1914                 /*
1915                  * TBD:
1916                  * we could share the same root & context tables
1917                  * amoung all IOMMU's. Need to Split it later.
1918                  */
1919                 ret = iommu_alloc_root_entry(iommu);
1920                 if (ret) {
1921                         printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1922                         goto error;
1923                 }
1924         }
1925
1926         for_each_drhd_unit(drhd) {
1927                 if (drhd->ignored)
1928                         continue;
1929
1930                 iommu = drhd->iommu;
1931                 if (dmar_enable_qi(iommu)) {
1932                         /*
1933                          * Queued Invalidate not enabled, use Register Based
1934                          * Invalidate
1935                          */
1936                         iommu->flush.flush_context = __iommu_flush_context;
1937                         iommu->flush.flush_iotlb = __iommu_flush_iotlb;
1938                         printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
1939                                "invalidation\n",
1940                                (unsigned long long)drhd->reg_base_addr);
1941                 } else {
1942                         iommu->flush.flush_context = qi_flush_context;
1943                         iommu->flush.flush_iotlb = qi_flush_iotlb;
1944                         printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
1945                                "invalidation\n",
1946                                (unsigned long long)drhd->reg_base_addr);
1947                 }
1948         }
1949
1950         /*
1951          * For each rmrr
1952          *   for each dev attached to rmrr
1953          *   do
1954          *     locate drhd for dev, alloc domain for dev
1955          *     allocate free domain
1956          *     allocate page table entries for rmrr
1957          *     if context not allocated for bus
1958          *           allocate and init context
1959          *           set present in root table for this bus
1960          *     init context with domain, translation etc
1961          *    endfor
1962          * endfor
1963          */
1964         for_each_rmrr_units(rmrr) {
1965                 for (i = 0; i < rmrr->devices_cnt; i++) {
1966                         pdev = rmrr->devices[i];
1967                         /* some BIOS lists non-exist devices in DMAR table */
1968                         if (!pdev)
1969                                 continue;
1970                         ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1971                         if (ret)
1972                                 printk(KERN_ERR
1973                                  "IOMMU: mapping reserved region failed\n");
1974                 }
1975         }
1976
1977         iommu_prepare_gfx_mapping();
1978
1979         iommu_prepare_isa();
1980
1981         /*
1982          * for each drhd
1983          *   enable fault log
1984          *   global invalidate context cache
1985          *   global invalidate iotlb
1986          *   enable translation
1987          */
1988         for_each_drhd_unit(drhd) {
1989                 if (drhd->ignored)
1990                         continue;
1991                 iommu = drhd->iommu;
1992                 sprintf (iommu->name, "dmar%d", unit++);
1993
1994                 iommu_flush_write_buffer(iommu);
1995
1996                 ret = dmar_set_interrupt(iommu);
1997                 if (ret)
1998                         goto error;
1999
2000                 iommu_set_root_entry(iommu);
2001
2002                 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
2003                                            0);
2004                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
2005                                          0);
2006                 iommu_disable_protect_mem_regions(iommu);
2007
2008                 ret = iommu_enable_translation(iommu);
2009                 if (ret)
2010                         goto error;
2011         }
2012
2013         return 0;
2014 error:
2015         for_each_drhd_unit(drhd) {
2016                 if (drhd->ignored)
2017                         continue;
2018                 iommu = drhd->iommu;
2019                 free_iommu(iommu);
2020         }
2021         kfree(g_iommus);
2022         return ret;
2023 }
2024
2025 static inline u64 aligned_size(u64 host_addr, size_t size)
2026 {
2027         u64 addr;
2028         addr = (host_addr & (~PAGE_MASK)) + size;
2029         return PAGE_ALIGN(addr);
2030 }
2031
2032 struct iova *
2033 iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
2034 {
2035         struct iova *piova;
2036
2037         /* Make sure it's in range */
2038         end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
2039         if (!size || (IOVA_START_ADDR + size > end))
2040                 return NULL;
2041
2042         piova = alloc_iova(&domain->iovad,
2043                         size >> PAGE_SHIFT, IOVA_PFN(end), 1);
2044         return piova;
2045 }
2046
2047 static struct iova *
2048 __intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
2049                    size_t size, u64 dma_mask)
2050 {
2051         struct pci_dev *pdev = to_pci_dev(dev);
2052         struct iova *iova = NULL;
2053
2054         if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
2055                 iova = iommu_alloc_iova(domain, size, dma_mask);
2056         else {
2057                 /*
2058                  * First try to allocate an io virtual address in
2059                  * DMA_32BIT_MASK and if that fails then try allocating
2060                  * from higher range
2061                  */
2062                 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
2063                 if (!iova)
2064                         iova = iommu_alloc_iova(domain, size, dma_mask);
2065         }
2066
2067         if (!iova) {
2068                 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
2069                 return NULL;
2070         }
2071
2072         return iova;
2073 }
2074
2075 static struct dmar_domain *
2076 get_valid_domain_for_dev(struct pci_dev *pdev)
2077 {
2078         struct dmar_domain *domain;
2079         int ret;
2080
2081         domain = get_domain_for_dev(pdev,
2082                         DEFAULT_DOMAIN_ADDRESS_WIDTH);
2083         if (!domain) {
2084                 printk(KERN_ERR
2085                         "Allocating domain for %s failed", pci_name(pdev));
2086                 return NULL;
2087         }
2088
2089         /* make sure context mapping is ok */
2090         if (unlikely(!domain_context_mapped(domain, pdev))) {
2091                 ret = domain_context_mapping(domain, pdev);
2092                 if (ret) {
2093                         printk(KERN_ERR
2094                                 "Domain context map for %s failed",
2095                                 pci_name(pdev));
2096                         return NULL;
2097                 }
2098         }
2099
2100         return domain;
2101 }
2102
2103 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2104                                      size_t size, int dir, u64 dma_mask)
2105 {
2106         struct pci_dev *pdev = to_pci_dev(hwdev);
2107         struct dmar_domain *domain;
2108         phys_addr_t start_paddr;
2109         struct iova *iova;
2110         int prot = 0;
2111         int ret;
2112         struct intel_iommu *iommu;
2113
2114         BUG_ON(dir == DMA_NONE);
2115         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2116                 return paddr;
2117
2118         domain = get_valid_domain_for_dev(pdev);
2119         if (!domain)
2120                 return 0;
2121
2122         iommu = domain_get_iommu(domain);
2123         size = aligned_size((u64)paddr, size);
2124
2125         iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2126         if (!iova)
2127                 goto error;
2128
2129         start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2130
2131         /*
2132          * Check if DMAR supports zero-length reads on write only
2133          * mappings..
2134          */
2135         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2136                         !cap_zlr(iommu->cap))
2137                 prot |= DMA_PTE_READ;
2138         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2139                 prot |= DMA_PTE_WRITE;
2140         /*
2141          * paddr - (paddr + size) might be partial page, we should map the whole
2142          * page.  Note: if two part of one page are separately mapped, we
2143          * might have two guest_addr mapping to the same host paddr, but this
2144          * is not a big problem
2145          */
2146         ret = domain_page_mapping(domain, start_paddr,
2147                 ((u64)paddr) & PAGE_MASK, size, prot);
2148         if (ret)
2149                 goto error;
2150
2151         /* it's a non-present to present mapping */
2152         ret = iommu_flush_iotlb_psi(iommu, domain->id,
2153                         start_paddr, size >> VTD_PAGE_SHIFT, 1);
2154         if (ret)
2155                 iommu_flush_write_buffer(iommu);
2156
2157         return start_paddr + ((u64)paddr & (~PAGE_MASK));
2158
2159 error:
2160         if (iova)
2161                 __free_iova(&domain->iovad, iova);
2162         printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
2163                 pci_name(pdev), size, (unsigned long long)paddr, dir);
2164         return 0;
2165 }
2166
2167 dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
2168                             size_t size, int dir)
2169 {
2170         return __intel_map_single(hwdev, paddr, size, dir,
2171                                   to_pci_dev(hwdev)->dma_mask);
2172 }
2173
2174 static void flush_unmaps(void)
2175 {
2176         int i, j;
2177
2178         timer_on = 0;
2179
2180         /* just flush them all */
2181         for (i = 0; i < g_num_of_iommus; i++) {
2182                 struct intel_iommu *iommu = g_iommus[i];
2183                 if (!iommu)
2184                         continue;
2185
2186                 if (deferred_flush[i].next) {
2187                         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2188                                                  DMA_TLB_GLOBAL_FLUSH, 0);
2189                         for (j = 0; j < deferred_flush[i].next; j++) {
2190                                 __free_iova(&deferred_flush[i].domain[j]->iovad,
2191                                                 deferred_flush[i].iova[j]);
2192                         }
2193                         deferred_flush[i].next = 0;
2194                 }
2195         }
2196
2197         list_size = 0;
2198 }
2199
2200 static void flush_unmaps_timeout(unsigned long data)
2201 {
2202         unsigned long flags;
2203
2204         spin_lock_irqsave(&async_umap_flush_lock, flags);
2205         flush_unmaps();
2206         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2207 }
2208
2209 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2210 {
2211         unsigned long flags;
2212         int next, iommu_id;
2213         struct intel_iommu *iommu;
2214
2215         spin_lock_irqsave(&async_umap_flush_lock, flags);
2216         if (list_size == HIGH_WATER_MARK)
2217                 flush_unmaps();
2218
2219         iommu = domain_get_iommu(dom);
2220         iommu_id = iommu->seq_id;
2221
2222         next = deferred_flush[iommu_id].next;
2223         deferred_flush[iommu_id].domain[next] = dom;
2224         deferred_flush[iommu_id].iova[next] = iova;
2225         deferred_flush[iommu_id].next++;
2226
2227         if (!timer_on) {
2228                 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2229                 timer_on = 1;
2230         }
2231         list_size++;
2232         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2233 }
2234
2235 void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
2236                         int dir)
2237 {
2238         struct pci_dev *pdev = to_pci_dev(dev);
2239         struct dmar_domain *domain;
2240         unsigned long start_addr;
2241         struct iova *iova;
2242         struct intel_iommu *iommu;
2243
2244         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2245                 return;
2246         domain = find_domain(pdev);
2247         BUG_ON(!domain);
2248
2249         iommu = domain_get_iommu(domain);
2250
2251         iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2252         if (!iova)
2253                 return;
2254
2255         start_addr = iova->pfn_lo << PAGE_SHIFT;
2256         size = aligned_size((u64)dev_addr, size);
2257
2258         pr_debug("Device %s unmapping: %lx@%llx\n",
2259                 pci_name(pdev), size, (unsigned long long)start_addr);
2260
2261         /*  clear the whole page */
2262         dma_pte_clear_range(domain, start_addr, start_addr + size);
2263         /* free page tables */
2264         dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2265         if (intel_iommu_strict) {
2266                 if (iommu_flush_iotlb_psi(iommu,
2267                         domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
2268                         iommu_flush_write_buffer(iommu);
2269                 /* free iova */
2270                 __free_iova(&domain->iovad, iova);
2271         } else {
2272                 add_unmap(domain, iova);
2273                 /*
2274                  * queue up the release of the unmap to save the 1/6th of the
2275                  * cpu used up by the iotlb flush operation...
2276                  */
2277         }
2278 }
2279
2280 void *intel_alloc_coherent(struct device *hwdev, size_t size,
2281                            dma_addr_t *dma_handle, gfp_t flags)
2282 {
2283         void *vaddr;
2284         int order;
2285
2286         size = PAGE_ALIGN(size);
2287         order = get_order(size);
2288         flags &= ~(GFP_DMA | GFP_DMA32);
2289
2290         vaddr = (void *)__get_free_pages(flags, order);
2291         if (!vaddr)
2292                 return NULL;
2293         memset(vaddr, 0, size);
2294
2295         *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2296                                          DMA_BIDIRECTIONAL,
2297                                          hwdev->coherent_dma_mask);
2298         if (*dma_handle)
2299                 return vaddr;
2300         free_pages((unsigned long)vaddr, order);
2301         return NULL;
2302 }
2303
2304 void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2305                          dma_addr_t dma_handle)
2306 {
2307         int order;
2308
2309         size = PAGE_ALIGN(size);
2310         order = get_order(size);
2311
2312         intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2313         free_pages((unsigned long)vaddr, order);
2314 }
2315
2316 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
2317
2318 void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2319                     int nelems, int dir)
2320 {
2321         int i;
2322         struct pci_dev *pdev = to_pci_dev(hwdev);
2323         struct dmar_domain *domain;
2324         unsigned long start_addr;
2325         struct iova *iova;
2326         size_t size = 0;
2327         void *addr;
2328         struct scatterlist *sg;
2329         struct intel_iommu *iommu;
2330
2331         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2332                 return;
2333
2334         domain = find_domain(pdev);
2335         BUG_ON(!domain);
2336
2337         iommu = domain_get_iommu(domain);
2338
2339         iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2340         if (!iova)
2341                 return;
2342         for_each_sg(sglist, sg, nelems, i) {
2343                 addr = SG_ENT_VIRT_ADDRESS(sg);
2344                 size += aligned_size((u64)addr, sg->length);
2345         }
2346
2347         start_addr = iova->pfn_lo << PAGE_SHIFT;
2348
2349         /*  clear the whole page */
2350         dma_pte_clear_range(domain, start_addr, start_addr + size);
2351         /* free page tables */
2352         dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2353
2354         if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
2355                         size >> VTD_PAGE_SHIFT, 0))
2356                 iommu_flush_write_buffer(iommu);
2357
2358         /* free iova */
2359         __free_iova(&domain->iovad, iova);
2360 }
2361
2362 static int intel_nontranslate_map_sg(struct device *hddev,
2363         struct scatterlist *sglist, int nelems, int dir)
2364 {
2365         int i;
2366         struct scatterlist *sg;
2367
2368         for_each_sg(sglist, sg, nelems, i) {
2369                 BUG_ON(!sg_page(sg));
2370                 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2371                 sg->dma_length = sg->length;
2372         }
2373         return nelems;
2374 }
2375
2376 int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2377                  int dir)
2378 {
2379         void *addr;
2380         int i;
2381         struct pci_dev *pdev = to_pci_dev(hwdev);
2382         struct dmar_domain *domain;
2383         size_t size = 0;
2384         int prot = 0;
2385         size_t offset = 0;
2386         struct iova *iova = NULL;
2387         int ret;
2388         struct scatterlist *sg;
2389         unsigned long start_addr;
2390         struct intel_iommu *iommu;
2391
2392         BUG_ON(dir == DMA_NONE);
2393         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2394                 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2395
2396         domain = get_valid_domain_for_dev(pdev);
2397         if (!domain)
2398                 return 0;
2399
2400         iommu = domain_get_iommu(domain);
2401
2402         for_each_sg(sglist, sg, nelems, i) {
2403                 addr = SG_ENT_VIRT_ADDRESS(sg);
2404                 addr = (void *)virt_to_phys(addr);
2405                 size += aligned_size((u64)addr, sg->length);
2406         }
2407
2408         iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2409         if (!iova) {
2410                 sglist->dma_length = 0;
2411                 return 0;
2412         }
2413
2414         /*
2415          * Check if DMAR supports zero-length reads on write only
2416          * mappings..
2417          */
2418         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2419                         !cap_zlr(iommu->cap))
2420                 prot |= DMA_PTE_READ;
2421         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2422                 prot |= DMA_PTE_WRITE;
2423
2424         start_addr = iova->pfn_lo << PAGE_SHIFT;
2425         offset = 0;
2426         for_each_sg(sglist, sg, nelems, i) {
2427                 addr = SG_ENT_VIRT_ADDRESS(sg);
2428                 addr = (void *)virt_to_phys(addr);
2429                 size = aligned_size((u64)addr, sg->length);
2430                 ret = domain_page_mapping(domain, start_addr + offset,
2431                         ((u64)addr) & PAGE_MASK,
2432                         size, prot);
2433                 if (ret) {
2434                         /*  clear the page */
2435                         dma_pte_clear_range(domain, start_addr,
2436                                   start_addr + offset);
2437                         /* free page tables */
2438                         dma_pte_free_pagetable(domain, start_addr,
2439                                   start_addr + offset);
2440                         /* free iova */
2441                         __free_iova(&domain->iovad, iova);
2442                         return 0;
2443                 }
2444                 sg->dma_address = start_addr + offset +
2445                                 ((u64)addr & (~PAGE_MASK));
2446                 sg->dma_length = sg->length;
2447                 offset += size;
2448         }
2449
2450         /* it's a non-present to present mapping */
2451         if (iommu_flush_iotlb_psi(iommu, domain->id,
2452                         start_addr, offset >> VTD_PAGE_SHIFT, 1))
2453                 iommu_flush_write_buffer(iommu);
2454         return nelems;
2455 }
2456
2457 static struct dma_mapping_ops intel_dma_ops = {
2458         .alloc_coherent = intel_alloc_coherent,
2459         .free_coherent = intel_free_coherent,
2460         .map_single = intel_map_single,
2461         .unmap_single = intel_unmap_single,
2462         .map_sg = intel_map_sg,
2463         .unmap_sg = intel_unmap_sg,
2464 };
2465
2466 static inline int iommu_domain_cache_init(void)
2467 {
2468         int ret = 0;
2469
2470         iommu_domain_cache = kmem_cache_create("iommu_domain",
2471                                          sizeof(struct dmar_domain),
2472                                          0,
2473                                          SLAB_HWCACHE_ALIGN,
2474
2475                                          NULL);
2476         if (!iommu_domain_cache) {
2477                 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2478                 ret = -ENOMEM;
2479         }
2480
2481         return ret;
2482 }
2483
2484 static inline int iommu_devinfo_cache_init(void)
2485 {
2486         int ret = 0;
2487
2488         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2489                                          sizeof(struct device_domain_info),
2490                                          0,
2491                                          SLAB_HWCACHE_ALIGN,
2492                                          NULL);
2493         if (!iommu_devinfo_cache) {
2494                 printk(KERN_ERR "Couldn't create devinfo cache\n");
2495                 ret = -ENOMEM;
2496         }
2497
2498         return ret;
2499 }
2500
2501 static inline int iommu_iova_cache_init(void)
2502 {
2503         int ret = 0;
2504
2505         iommu_iova_cache = kmem_cache_create("iommu_iova",
2506                                          sizeof(struct iova),
2507                                          0,
2508                                          SLAB_HWCACHE_ALIGN,
2509                                          NULL);
2510         if (!iommu_iova_cache) {
2511                 printk(KERN_ERR "Couldn't create iova cache\n");
2512                 ret = -ENOMEM;
2513         }
2514
2515         return ret;
2516 }
2517
2518 static int __init iommu_init_mempool(void)
2519 {
2520         int ret;
2521         ret = iommu_iova_cache_init();
2522         if (ret)
2523                 return ret;
2524
2525         ret = iommu_domain_cache_init();
2526         if (ret)
2527                 goto domain_error;
2528
2529         ret = iommu_devinfo_cache_init();
2530         if (!ret)
2531                 return ret;
2532
2533         kmem_cache_destroy(iommu_domain_cache);
2534 domain_error:
2535         kmem_cache_destroy(iommu_iova_cache);
2536
2537         return -ENOMEM;
2538 }
2539
2540 static void __init iommu_exit_mempool(void)
2541 {
2542         kmem_cache_destroy(iommu_devinfo_cache);
2543         kmem_cache_destroy(iommu_domain_cache);
2544         kmem_cache_destroy(iommu_iova_cache);
2545
2546 }
2547
2548 static void __init init_no_remapping_devices(void)
2549 {
2550         struct dmar_drhd_unit *drhd;
2551
2552         for_each_drhd_unit(drhd) {
2553                 if (!drhd->include_all) {
2554                         int i;
2555                         for (i = 0; i < drhd->devices_cnt; i++)
2556                                 if (drhd->devices[i] != NULL)
2557                                         break;
2558                         /* ignore DMAR unit if no pci devices exist */
2559                         if (i == drhd->devices_cnt)
2560                                 drhd->ignored = 1;
2561                 }
2562         }
2563
2564         if (dmar_map_gfx)
2565                 return;
2566
2567         for_each_drhd_unit(drhd) {
2568                 int i;
2569                 if (drhd->ignored || drhd->include_all)
2570                         continue;
2571
2572                 for (i = 0; i < drhd->devices_cnt; i++)
2573                         if (drhd->devices[i] &&
2574                                 !IS_GFX_DEVICE(drhd->devices[i]))
2575                                 break;
2576
2577                 if (i < drhd->devices_cnt)
2578                         continue;
2579
2580                 /* bypass IOMMU if it is just for gfx devices */
2581                 drhd->ignored = 1;
2582                 for (i = 0; i < drhd->devices_cnt; i++) {
2583                         if (!drhd->devices[i])
2584                                 continue;
2585                         drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
2586                 }
2587         }
2588 }
2589
2590 int __init intel_iommu_init(void)
2591 {
2592         int ret = 0;
2593
2594         if (dmar_table_init())
2595                 return  -ENODEV;
2596
2597         if (dmar_dev_scope_init())
2598                 return  -ENODEV;
2599
2600         /*
2601          * Check the need for DMA-remapping initialization now.
2602          * Above initialization will also be used by Interrupt-remapping.
2603          */
2604         if (no_iommu || swiotlb || dmar_disabled)
2605                 return -ENODEV;
2606
2607         iommu_init_mempool();
2608         dmar_init_reserved_ranges();
2609
2610         init_no_remapping_devices();
2611
2612         ret = init_dmars();
2613         if (ret) {
2614                 printk(KERN_ERR "IOMMU: dmar init failed\n");
2615                 put_iova_domain(&reserved_iova_list);
2616                 iommu_exit_mempool();
2617                 return ret;
2618         }
2619         printk(KERN_INFO
2620         "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2621
2622         init_timer(&unmap_timer);
2623         force_iommu = 1;
2624         dma_ops = &intel_dma_ops;
2625         return 0;
2626 }
2627
2628 void intel_iommu_domain_exit(struct dmar_domain *domain)
2629 {
2630         u64 end;
2631
2632         /* Domain 0 is reserved, so dont process it */
2633         if (!domain)
2634                 return;
2635
2636         end = DOMAIN_MAX_ADDR(domain->gaw);
2637         end = end & (~VTD_PAGE_MASK);
2638
2639         /* clear ptes */
2640         dma_pte_clear_range(domain, 0, end);
2641
2642         /* free page tables */
2643         dma_pte_free_pagetable(domain, 0, end);
2644
2645         iommu_free_domain(domain);
2646         free_domain_mem(domain);
2647 }
2648 EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
2649
2650 struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
2651 {
2652         struct dmar_drhd_unit *drhd;
2653         struct dmar_domain *domain;
2654         struct intel_iommu *iommu;
2655
2656         drhd = dmar_find_matched_drhd_unit(pdev);
2657         if (!drhd) {
2658                 printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
2659                 return NULL;
2660         }
2661
2662         iommu = drhd->iommu;
2663         if (!iommu) {
2664                 printk(KERN_ERR
2665                         "intel_iommu_domain_alloc: iommu == NULL\n");
2666                 return NULL;
2667         }
2668         domain = iommu_alloc_domain(iommu);
2669         if (!domain) {
2670                 printk(KERN_ERR
2671                         "intel_iommu_domain_alloc: domain == NULL\n");
2672                 return NULL;
2673         }
2674         if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2675                 printk(KERN_ERR
2676                         "intel_iommu_domain_alloc: domain_init() failed\n");
2677                 intel_iommu_domain_exit(domain);
2678                 return NULL;
2679         }
2680         return domain;
2681 }
2682 EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
2683
2684 int intel_iommu_context_mapping(
2685         struct dmar_domain *domain, struct pci_dev *pdev)
2686 {
2687         int rc;
2688         rc = domain_context_mapping(domain, pdev);
2689         return rc;
2690 }
2691 EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
2692
2693 int intel_iommu_page_mapping(
2694         struct dmar_domain *domain, dma_addr_t iova,
2695         u64 hpa, size_t size, int prot)
2696 {
2697         int rc;
2698         rc = domain_page_mapping(domain, iova, hpa, size, prot);
2699         return rc;
2700 }
2701 EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
2702
2703 void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
2704 {
2705         detach_domain_for_dev(domain, bus, devfn);
2706 }
2707 EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
2708
2709 struct dmar_domain *
2710 intel_iommu_find_domain(struct pci_dev *pdev)
2711 {
2712         return find_domain(pdev);
2713 }
2714 EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
2715
2716 int intel_iommu_found(void)
2717 {
2718         return g_num_of_iommus;
2719 }
2720 EXPORT_SYMBOL_GPL(intel_iommu_found);
2721
2722 u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
2723 {
2724         struct dma_pte *pte;
2725         u64 pfn;
2726
2727         pfn = 0;
2728         pte = addr_to_dma_pte(domain, iova);
2729
2730         if (pte)
2731                 pfn = dma_pte_addr(pte);
2732
2733         return pfn >> VTD_PAGE_SHIFT;
2734 }
2735 EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);