x86: intel-iommu: Convert detect_intel_iommu to use iommu_init hook
[safe/jmp/linux-2.6] / drivers / pci / dmar.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28
29 #include <linux/pci.h>
30 #include <linux/dmar.h>
31 #include <linux/iova.h>
32 #include <linux/intel-iommu.h>
33 #include <linux/timer.h>
34 #include <linux/irq.h>
35 #include <linux/interrupt.h>
36 #include <linux/tboot.h>
37 #include <linux/dmi.h>
38
39 #define PREFIX "DMAR: "
40
41 /* No locks are needed as DMA remapping hardware unit
42  * list is constructed at boot time and hotplug of
43  * these units are not supported by the architecture.
44  */
45 LIST_HEAD(dmar_drhd_units);
46
47 static struct acpi_table_header * __initdata dmar_tbl;
48 static acpi_size dmar_tbl_size;
49
50 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
51 {
52         /*
53          * add INCLUDE_ALL at the tail, so scan the list will find it at
54          * the very end.
55          */
56         if (drhd->include_all)
57                 list_add_tail(&drhd->list, &dmar_drhd_units);
58         else
59                 list_add(&drhd->list, &dmar_drhd_units);
60 }
61
62 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
63                                            struct pci_dev **dev, u16 segment)
64 {
65         struct pci_bus *bus;
66         struct pci_dev *pdev = NULL;
67         struct acpi_dmar_pci_path *path;
68         int count;
69
70         bus = pci_find_bus(segment, scope->bus);
71         path = (struct acpi_dmar_pci_path *)(scope + 1);
72         count = (scope->length - sizeof(struct acpi_dmar_device_scope))
73                 / sizeof(struct acpi_dmar_pci_path);
74
75         while (count) {
76                 if (pdev)
77                         pci_dev_put(pdev);
78                 /*
79                  * Some BIOSes list non-exist devices in DMAR table, just
80                  * ignore it
81                  */
82                 if (!bus) {
83                         printk(KERN_WARNING
84                         PREFIX "Device scope bus [%d] not found\n",
85                         scope->bus);
86                         break;
87                 }
88                 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
89                 if (!pdev) {
90                         printk(KERN_WARNING PREFIX
91                         "Device scope device [%04x:%02x:%02x.%02x] not found\n",
92                                 segment, bus->number, path->dev, path->fn);
93                         break;
94                 }
95                 path ++;
96                 count --;
97                 bus = pdev->subordinate;
98         }
99         if (!pdev) {
100                 printk(KERN_WARNING PREFIX
101                 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
102                 segment, scope->bus, path->dev, path->fn);
103                 *dev = NULL;
104                 return 0;
105         }
106         if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
107                         pdev->subordinate) || (scope->entry_type == \
108                         ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
109                 pci_dev_put(pdev);
110                 printk(KERN_WARNING PREFIX
111                         "Device scope type does not match for %s\n",
112                          pci_name(pdev));
113                 return -EINVAL;
114         }
115         *dev = pdev;
116         return 0;
117 }
118
119 static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
120                                        struct pci_dev ***devices, u16 segment)
121 {
122         struct acpi_dmar_device_scope *scope;
123         void * tmp = start;
124         int index;
125         int ret;
126
127         *cnt = 0;
128         while (start < end) {
129                 scope = start;
130                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
131                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
132                         (*cnt)++;
133                 else
134                         printk(KERN_WARNING PREFIX
135                                 "Unsupported device scope\n");
136                 start += scope->length;
137         }
138         if (*cnt == 0)
139                 return 0;
140
141         *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
142         if (!*devices)
143                 return -ENOMEM;
144
145         start = tmp;
146         index = 0;
147         while (start < end) {
148                 scope = start;
149                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
150                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
151                         ret = dmar_parse_one_dev_scope(scope,
152                                 &(*devices)[index], segment);
153                         if (ret) {
154                                 kfree(*devices);
155                                 return ret;
156                         }
157                         index ++;
158                 }
159                 start += scope->length;
160         }
161
162         return 0;
163 }
164
165 /**
166  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
167  * structure which uniquely represent one DMA remapping hardware unit
168  * present in the platform
169  */
170 static int __init
171 dmar_parse_one_drhd(struct acpi_dmar_header *header)
172 {
173         struct acpi_dmar_hardware_unit *drhd;
174         struct dmar_drhd_unit *dmaru;
175         int ret = 0;
176
177         drhd = (struct acpi_dmar_hardware_unit *)header;
178         if (!drhd->address) {
179                 /* Promote an attitude of violence to a BIOS engineer today */
180                 WARN(1, "Your BIOS is broken; DMAR reported at address zero!\n"
181                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
182                      dmi_get_system_info(DMI_BIOS_VENDOR),
183                      dmi_get_system_info(DMI_BIOS_VERSION),
184                      dmi_get_system_info(DMI_PRODUCT_VERSION));
185                 return -ENODEV;
186         }
187         dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
188         if (!dmaru)
189                 return -ENOMEM;
190
191         dmaru->hdr = header;
192         dmaru->reg_base_addr = drhd->address;
193         dmaru->segment = drhd->segment;
194         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
195
196         ret = alloc_iommu(dmaru);
197         if (ret) {
198                 kfree(dmaru);
199                 return ret;
200         }
201         dmar_register_drhd_unit(dmaru);
202         return 0;
203 }
204
205 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
206 {
207         struct acpi_dmar_hardware_unit *drhd;
208         int ret = 0;
209
210         drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
211
212         if (dmaru->include_all)
213                 return 0;
214
215         ret = dmar_parse_dev_scope((void *)(drhd + 1),
216                                 ((void *)drhd) + drhd->header.length,
217                                 &dmaru->devices_cnt, &dmaru->devices,
218                                 drhd->segment);
219         if (ret) {
220                 list_del(&dmaru->list);
221                 kfree(dmaru);
222         }
223         return ret;
224 }
225
226 #ifdef CONFIG_DMAR
227 LIST_HEAD(dmar_rmrr_units);
228
229 static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
230 {
231         list_add(&rmrr->list, &dmar_rmrr_units);
232 }
233
234
235 static int __init
236 dmar_parse_one_rmrr(struct acpi_dmar_header *header)
237 {
238         struct acpi_dmar_reserved_memory *rmrr;
239         struct dmar_rmrr_unit *rmrru;
240
241         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
242         if (!rmrru)
243                 return -ENOMEM;
244
245         rmrru->hdr = header;
246         rmrr = (struct acpi_dmar_reserved_memory *)header;
247         rmrru->base_address = rmrr->base_address;
248         rmrru->end_address = rmrr->end_address;
249
250         dmar_register_rmrr_unit(rmrru);
251         return 0;
252 }
253
254 static int __init
255 rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
256 {
257         struct acpi_dmar_reserved_memory *rmrr;
258         int ret;
259
260         rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
261         ret = dmar_parse_dev_scope((void *)(rmrr + 1),
262                 ((void *)rmrr) + rmrr->header.length,
263                 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
264
265         if (ret || (rmrru->devices_cnt == 0)) {
266                 list_del(&rmrru->list);
267                 kfree(rmrru);
268         }
269         return ret;
270 }
271
272 static LIST_HEAD(dmar_atsr_units);
273
274 static int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
275 {
276         struct acpi_dmar_atsr *atsr;
277         struct dmar_atsr_unit *atsru;
278
279         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
280         atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
281         if (!atsru)
282                 return -ENOMEM;
283
284         atsru->hdr = hdr;
285         atsru->include_all = atsr->flags & 0x1;
286
287         list_add(&atsru->list, &dmar_atsr_units);
288
289         return 0;
290 }
291
292 static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
293 {
294         int rc;
295         struct acpi_dmar_atsr *atsr;
296
297         if (atsru->include_all)
298                 return 0;
299
300         atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
301         rc = dmar_parse_dev_scope((void *)(atsr + 1),
302                                 (void *)atsr + atsr->header.length,
303                                 &atsru->devices_cnt, &atsru->devices,
304                                 atsr->segment);
305         if (rc || !atsru->devices_cnt) {
306                 list_del(&atsru->list);
307                 kfree(atsru);
308         }
309
310         return rc;
311 }
312
313 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
314 {
315         int i;
316         struct pci_bus *bus;
317         struct acpi_dmar_atsr *atsr;
318         struct dmar_atsr_unit *atsru;
319
320         list_for_each_entry(atsru, &dmar_atsr_units, list) {
321                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
322                 if (atsr->segment == pci_domain_nr(dev->bus))
323                         goto found;
324         }
325
326         return 0;
327
328 found:
329         for (bus = dev->bus; bus; bus = bus->parent) {
330                 struct pci_dev *bridge = bus->self;
331
332                 if (!bridge || !bridge->is_pcie ||
333                     bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
334                         return 0;
335
336                 if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
337                         for (i = 0; i < atsru->devices_cnt; i++)
338                                 if (atsru->devices[i] == bridge)
339                                         return 1;
340                         break;
341                 }
342         }
343
344         if (atsru->include_all)
345                 return 1;
346
347         return 0;
348 }
349 #endif
350
351 static void __init
352 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
353 {
354         struct acpi_dmar_hardware_unit *drhd;
355         struct acpi_dmar_reserved_memory *rmrr;
356         struct acpi_dmar_atsr *atsr;
357         struct acpi_dmar_rhsa *rhsa;
358
359         switch (header->type) {
360         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
361                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
362                                     header);
363                 printk (KERN_INFO PREFIX
364                         "DRHD base: %#016Lx flags: %#x\n",
365                         (unsigned long long)drhd->address, drhd->flags);
366                 break;
367         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
368                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
369                                     header);
370                 printk (KERN_INFO PREFIX
371                         "RMRR base: %#016Lx end: %#016Lx\n",
372                         (unsigned long long)rmrr->base_address,
373                         (unsigned long long)rmrr->end_address);
374                 break;
375         case ACPI_DMAR_TYPE_ATSR:
376                 atsr = container_of(header, struct acpi_dmar_atsr, header);
377                 printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags);
378                 break;
379         case ACPI_DMAR_HARDWARE_AFFINITY:
380                 rhsa = container_of(header, struct acpi_dmar_rhsa, header);
381                 printk(KERN_INFO PREFIX "RHSA base: %#016Lx proximity domain: %#x\n",
382                        (unsigned long long)rhsa->base_address,
383                        rhsa->proximity_domain);
384                 break;
385         }
386 }
387
388 /**
389  * dmar_table_detect - checks to see if the platform supports DMAR devices
390  */
391 static int __init dmar_table_detect(void)
392 {
393         acpi_status status = AE_OK;
394
395         /* if we could find DMAR table, then there are DMAR devices */
396         status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
397                                 (struct acpi_table_header **)&dmar_tbl,
398                                 &dmar_tbl_size);
399
400         if (ACPI_SUCCESS(status) && !dmar_tbl) {
401                 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
402                 status = AE_NOT_FOUND;
403         }
404
405         return (ACPI_SUCCESS(status) ? 1 : 0);
406 }
407
408 /**
409  * parse_dmar_table - parses the DMA reporting table
410  */
411 static int __init
412 parse_dmar_table(void)
413 {
414         struct acpi_table_dmar *dmar;
415         struct acpi_dmar_header *entry_header;
416         int ret = 0;
417
418         /*
419          * Do it again, earlier dmar_tbl mapping could be mapped with
420          * fixed map.
421          */
422         dmar_table_detect();
423
424         /*
425          * ACPI tables may not be DMA protected by tboot, so use DMAR copy
426          * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
427          */
428         dmar_tbl = tboot_get_dmar_table(dmar_tbl);
429
430         dmar = (struct acpi_table_dmar *)dmar_tbl;
431         if (!dmar)
432                 return -ENODEV;
433
434         if (dmar->width < PAGE_SHIFT - 1) {
435                 printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
436                 return -EINVAL;
437         }
438
439         printk (KERN_INFO PREFIX "Host address width %d\n",
440                 dmar->width + 1);
441
442         entry_header = (struct acpi_dmar_header *)(dmar + 1);
443         while (((unsigned long)entry_header) <
444                         (((unsigned long)dmar) + dmar_tbl->length)) {
445                 /* Avoid looping forever on bad ACPI tables */
446                 if (entry_header->length == 0) {
447                         printk(KERN_WARNING PREFIX
448                                 "Invalid 0-length structure\n");
449                         ret = -EINVAL;
450                         break;
451                 }
452
453                 dmar_table_print_dmar_entry(entry_header);
454
455                 switch (entry_header->type) {
456                 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
457                         ret = dmar_parse_one_drhd(entry_header);
458                         break;
459                 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
460 #ifdef CONFIG_DMAR
461                         ret = dmar_parse_one_rmrr(entry_header);
462 #endif
463                         break;
464                 case ACPI_DMAR_TYPE_ATSR:
465 #ifdef CONFIG_DMAR
466                         ret = dmar_parse_one_atsr(entry_header);
467 #endif
468                         break;
469                 case ACPI_DMAR_HARDWARE_AFFINITY:
470                         /* We don't do anything with RHSA (yet?) */
471                         break;
472                 default:
473                         printk(KERN_WARNING PREFIX
474                                 "Unknown DMAR structure type %d\n",
475                                 entry_header->type);
476                         ret = 0; /* for forward compatibility */
477                         break;
478                 }
479                 if (ret)
480                         break;
481
482                 entry_header = ((void *)entry_header + entry_header->length);
483         }
484         return ret;
485 }
486
487 int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
488                           struct pci_dev *dev)
489 {
490         int index;
491
492         while (dev) {
493                 for (index = 0; index < cnt; index++)
494                         if (dev == devices[index])
495                                 return 1;
496
497                 /* Check our parent */
498                 dev = dev->bus->self;
499         }
500
501         return 0;
502 }
503
504 struct dmar_drhd_unit *
505 dmar_find_matched_drhd_unit(struct pci_dev *dev)
506 {
507         struct dmar_drhd_unit *dmaru = NULL;
508         struct acpi_dmar_hardware_unit *drhd;
509
510         list_for_each_entry(dmaru, &dmar_drhd_units, list) {
511                 drhd = container_of(dmaru->hdr,
512                                     struct acpi_dmar_hardware_unit,
513                                     header);
514
515                 if (dmaru->include_all &&
516                     drhd->segment == pci_domain_nr(dev->bus))
517                         return dmaru;
518
519                 if (dmar_pci_device_match(dmaru->devices,
520                                           dmaru->devices_cnt, dev))
521                         return dmaru;
522         }
523
524         return NULL;
525 }
526
527 int __init dmar_dev_scope_init(void)
528 {
529         struct dmar_drhd_unit *drhd, *drhd_n;
530         int ret = -ENODEV;
531
532         list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
533                 ret = dmar_parse_dev(drhd);
534                 if (ret)
535                         return ret;
536         }
537
538 #ifdef CONFIG_DMAR
539         {
540                 struct dmar_rmrr_unit *rmrr, *rmrr_n;
541                 struct dmar_atsr_unit *atsr, *atsr_n;
542
543                 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
544                         ret = rmrr_parse_dev(rmrr);
545                         if (ret)
546                                 return ret;
547                 }
548
549                 list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
550                         ret = atsr_parse_dev(atsr);
551                         if (ret)
552                                 return ret;
553                 }
554         }
555 #endif
556
557         return ret;
558 }
559
560
561 int __init dmar_table_init(void)
562 {
563         static int dmar_table_initialized;
564         int ret;
565
566         if (dmar_table_initialized)
567                 return 0;
568
569         dmar_table_initialized = 1;
570
571         ret = parse_dmar_table();
572         if (ret) {
573                 if (ret != -ENODEV)
574                         printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
575                 return ret;
576         }
577
578         if (list_empty(&dmar_drhd_units)) {
579                 printk(KERN_INFO PREFIX "No DMAR devices found\n");
580                 return -ENODEV;
581         }
582
583 #ifdef CONFIG_DMAR
584         if (list_empty(&dmar_rmrr_units))
585                 printk(KERN_INFO PREFIX "No RMRR found\n");
586
587         if (list_empty(&dmar_atsr_units))
588                 printk(KERN_INFO PREFIX "No ATSR found\n");
589 #endif
590
591         return 0;
592 }
593
594 void __init detect_intel_iommu(void)
595 {
596         int ret;
597
598         ret = dmar_table_detect();
599
600         {
601 #ifdef CONFIG_INTR_REMAP
602                 struct acpi_table_dmar *dmar;
603                 /*
604                  * for now we will disable dma-remapping when interrupt
605                  * remapping is enabled.
606                  * When support for queued invalidation for IOTLB invalidation
607                  * is added, we will not need this any more.
608                  */
609                 dmar = (struct acpi_table_dmar *) dmar_tbl;
610                 if (ret && cpu_has_x2apic && dmar->flags & 0x1)
611                         printk(KERN_INFO
612                                "Queued invalidation will be enabled to support "
613                                "x2apic and Intr-remapping.\n");
614 #endif
615 #ifdef CONFIG_DMAR
616                 if (ret && !no_iommu && !iommu_detected && !swiotlb &&
617                     !dmar_disabled)
618                         iommu_detected = 1;
619 #endif
620 #ifdef CONFIG_X86
621                 if (ret)
622                         x86_init.iommu.iommu_init = intel_iommu_init;
623 #endif
624         }
625         early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
626         dmar_tbl = NULL;
627 }
628
629
630 int alloc_iommu(struct dmar_drhd_unit *drhd)
631 {
632         struct intel_iommu *iommu;
633         int map_size;
634         u32 ver;
635         static int iommu_allocated = 0;
636         int agaw = 0;
637         int msagaw = 0;
638
639         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
640         if (!iommu)
641                 return -ENOMEM;
642
643         iommu->seq_id = iommu_allocated++;
644         sprintf (iommu->name, "dmar%d", iommu->seq_id);
645
646         iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
647         if (!iommu->reg) {
648                 printk(KERN_ERR "IOMMU: can't map the region\n");
649                 goto error;
650         }
651         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
652         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
653
654         if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
655                 /* Promote an attitude of violence to a BIOS engineer today */
656                 WARN(1, "Your BIOS is broken; DMAR reported at address %llx returns all ones!\n"
657                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
658                      drhd->reg_base_addr,
659                      dmi_get_system_info(DMI_BIOS_VENDOR),
660                      dmi_get_system_info(DMI_BIOS_VERSION),
661                      dmi_get_system_info(DMI_PRODUCT_VERSION));
662                 goto err_unmap;
663         }
664
665 #ifdef CONFIG_DMAR
666         agaw = iommu_calculate_agaw(iommu);
667         if (agaw < 0) {
668                 printk(KERN_ERR
669                        "Cannot get a valid agaw for iommu (seq_id = %d)\n",
670                        iommu->seq_id);
671                 goto err_unmap;
672         }
673         msagaw = iommu_calculate_max_sagaw(iommu);
674         if (msagaw < 0) {
675                 printk(KERN_ERR
676                         "Cannot get a valid max agaw for iommu (seq_id = %d)\n",
677                         iommu->seq_id);
678                 goto err_unmap;
679         }
680 #endif
681         iommu->agaw = agaw;
682         iommu->msagaw = msagaw;
683
684         /* the registers might be more than one page */
685         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
686                 cap_max_fault_reg_offset(iommu->cap));
687         map_size = VTD_PAGE_ALIGN(map_size);
688         if (map_size > VTD_PAGE_SIZE) {
689                 iounmap(iommu->reg);
690                 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
691                 if (!iommu->reg) {
692                         printk(KERN_ERR "IOMMU: can't map the region\n");
693                         goto error;
694                 }
695         }
696
697         ver = readl(iommu->reg + DMAR_VER_REG);
698         pr_info("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
699                 (unsigned long long)drhd->reg_base_addr,
700                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
701                 (unsigned long long)iommu->cap,
702                 (unsigned long long)iommu->ecap);
703
704         spin_lock_init(&iommu->register_lock);
705
706         drhd->iommu = iommu;
707         return 0;
708
709  err_unmap:
710         iounmap(iommu->reg);
711  error:
712         kfree(iommu);
713         return -1;
714 }
715
716 void free_iommu(struct intel_iommu *iommu)
717 {
718         if (!iommu)
719                 return;
720
721 #ifdef CONFIG_DMAR
722         free_dmar_iommu(iommu);
723 #endif
724
725         if (iommu->reg)
726                 iounmap(iommu->reg);
727         kfree(iommu);
728 }
729
730 /*
731  * Reclaim all the submitted descriptors which have completed its work.
732  */
733 static inline void reclaim_free_desc(struct q_inval *qi)
734 {
735         while (qi->desc_status[qi->free_tail] == QI_DONE ||
736                qi->desc_status[qi->free_tail] == QI_ABORT) {
737                 qi->desc_status[qi->free_tail] = QI_FREE;
738                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
739                 qi->free_cnt++;
740         }
741 }
742
743 static int qi_check_fault(struct intel_iommu *iommu, int index)
744 {
745         u32 fault;
746         int head, tail;
747         struct q_inval *qi = iommu->qi;
748         int wait_index = (index + 1) % QI_LENGTH;
749
750         if (qi->desc_status[wait_index] == QI_ABORT)
751                 return -EAGAIN;
752
753         fault = readl(iommu->reg + DMAR_FSTS_REG);
754
755         /*
756          * If IQE happens, the head points to the descriptor associated
757          * with the error. No new descriptors are fetched until the IQE
758          * is cleared.
759          */
760         if (fault & DMA_FSTS_IQE) {
761                 head = readl(iommu->reg + DMAR_IQH_REG);
762                 if ((head >> DMAR_IQ_SHIFT) == index) {
763                         printk(KERN_ERR "VT-d detected invalid descriptor: "
764                                 "low=%llx, high=%llx\n",
765                                 (unsigned long long)qi->desc[index].low,
766                                 (unsigned long long)qi->desc[index].high);
767                         memcpy(&qi->desc[index], &qi->desc[wait_index],
768                                         sizeof(struct qi_desc));
769                         __iommu_flush_cache(iommu, &qi->desc[index],
770                                         sizeof(struct qi_desc));
771                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
772                         return -EINVAL;
773                 }
774         }
775
776         /*
777          * If ITE happens, all pending wait_desc commands are aborted.
778          * No new descriptors are fetched until the ITE is cleared.
779          */
780         if (fault & DMA_FSTS_ITE) {
781                 head = readl(iommu->reg + DMAR_IQH_REG);
782                 head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
783                 head |= 1;
784                 tail = readl(iommu->reg + DMAR_IQT_REG);
785                 tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
786
787                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
788
789                 do {
790                         if (qi->desc_status[head] == QI_IN_USE)
791                                 qi->desc_status[head] = QI_ABORT;
792                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
793                 } while (head != tail);
794
795                 if (qi->desc_status[wait_index] == QI_ABORT)
796                         return -EAGAIN;
797         }
798
799         if (fault & DMA_FSTS_ICE)
800                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
801
802         return 0;
803 }
804
805 /*
806  * Submit the queued invalidation descriptor to the remapping
807  * hardware unit and wait for its completion.
808  */
809 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
810 {
811         int rc;
812         struct q_inval *qi = iommu->qi;
813         struct qi_desc *hw, wait_desc;
814         int wait_index, index;
815         unsigned long flags;
816
817         if (!qi)
818                 return 0;
819
820         hw = qi->desc;
821
822 restart:
823         rc = 0;
824
825         spin_lock_irqsave(&qi->q_lock, flags);
826         while (qi->free_cnt < 3) {
827                 spin_unlock_irqrestore(&qi->q_lock, flags);
828                 cpu_relax();
829                 spin_lock_irqsave(&qi->q_lock, flags);
830         }
831
832         index = qi->free_head;
833         wait_index = (index + 1) % QI_LENGTH;
834
835         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
836
837         hw[index] = *desc;
838
839         wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
840                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
841         wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
842
843         hw[wait_index] = wait_desc;
844
845         __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
846         __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
847
848         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
849         qi->free_cnt -= 2;
850
851         /*
852          * update the HW tail register indicating the presence of
853          * new descriptors.
854          */
855         writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
856
857         while (qi->desc_status[wait_index] != QI_DONE) {
858                 /*
859                  * We will leave the interrupts disabled, to prevent interrupt
860                  * context to queue another cmd while a cmd is already submitted
861                  * and waiting for completion on this cpu. This is to avoid
862                  * a deadlock where the interrupt context can wait indefinitely
863                  * for free slots in the queue.
864                  */
865                 rc = qi_check_fault(iommu, index);
866                 if (rc)
867                         break;
868
869                 spin_unlock(&qi->q_lock);
870                 cpu_relax();
871                 spin_lock(&qi->q_lock);
872         }
873
874         qi->desc_status[index] = QI_DONE;
875
876         reclaim_free_desc(qi);
877         spin_unlock_irqrestore(&qi->q_lock, flags);
878
879         if (rc == -EAGAIN)
880                 goto restart;
881
882         return rc;
883 }
884
885 /*
886  * Flush the global interrupt entry cache.
887  */
888 void qi_global_iec(struct intel_iommu *iommu)
889 {
890         struct qi_desc desc;
891
892         desc.low = QI_IEC_TYPE;
893         desc.high = 0;
894
895         /* should never fail */
896         qi_submit_sync(&desc, iommu);
897 }
898
899 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
900                       u64 type)
901 {
902         struct qi_desc desc;
903
904         desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
905                         | QI_CC_GRAN(type) | QI_CC_TYPE;
906         desc.high = 0;
907
908         qi_submit_sync(&desc, iommu);
909 }
910
911 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
912                     unsigned int size_order, u64 type)
913 {
914         u8 dw = 0, dr = 0;
915
916         struct qi_desc desc;
917         int ih = 0;
918
919         if (cap_write_drain(iommu->cap))
920                 dw = 1;
921
922         if (cap_read_drain(iommu->cap))
923                 dr = 1;
924
925         desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
926                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
927         desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
928                 | QI_IOTLB_AM(size_order);
929
930         qi_submit_sync(&desc, iommu);
931 }
932
933 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
934                         u64 addr, unsigned mask)
935 {
936         struct qi_desc desc;
937
938         if (mask) {
939                 BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
940                 addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
941                 desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
942         } else
943                 desc.high = QI_DEV_IOTLB_ADDR(addr);
944
945         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
946                 qdep = 0;
947
948         desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
949                    QI_DIOTLB_TYPE;
950
951         qi_submit_sync(&desc, iommu);
952 }
953
954 /*
955  * Disable Queued Invalidation interface.
956  */
957 void dmar_disable_qi(struct intel_iommu *iommu)
958 {
959         unsigned long flags;
960         u32 sts;
961         cycles_t start_time = get_cycles();
962
963         if (!ecap_qis(iommu->ecap))
964                 return;
965
966         spin_lock_irqsave(&iommu->register_lock, flags);
967
968         sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
969         if (!(sts & DMA_GSTS_QIES))
970                 goto end;
971
972         /*
973          * Give a chance to HW to complete the pending invalidation requests.
974          */
975         while ((readl(iommu->reg + DMAR_IQT_REG) !=
976                 readl(iommu->reg + DMAR_IQH_REG)) &&
977                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
978                 cpu_relax();
979
980         iommu->gcmd &= ~DMA_GCMD_QIE;
981         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
982
983         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
984                       !(sts & DMA_GSTS_QIES), sts);
985 end:
986         spin_unlock_irqrestore(&iommu->register_lock, flags);
987 }
988
989 /*
990  * Enable queued invalidation.
991  */
992 static void __dmar_enable_qi(struct intel_iommu *iommu)
993 {
994         u32 sts;
995         unsigned long flags;
996         struct q_inval *qi = iommu->qi;
997
998         qi->free_head = qi->free_tail = 0;
999         qi->free_cnt = QI_LENGTH;
1000
1001         spin_lock_irqsave(&iommu->register_lock, flags);
1002
1003         /* write zero to the tail reg */
1004         writel(0, iommu->reg + DMAR_IQT_REG);
1005
1006         dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
1007
1008         iommu->gcmd |= DMA_GCMD_QIE;
1009         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1010
1011         /* Make sure hardware complete it */
1012         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1013
1014         spin_unlock_irqrestore(&iommu->register_lock, flags);
1015 }
1016
1017 /*
1018  * Enable Queued Invalidation interface. This is a must to support
1019  * interrupt-remapping. Also used by DMA-remapping, which replaces
1020  * register based IOTLB invalidation.
1021  */
1022 int dmar_enable_qi(struct intel_iommu *iommu)
1023 {
1024         struct q_inval *qi;
1025
1026         if (!ecap_qis(iommu->ecap))
1027                 return -ENOENT;
1028
1029         /*
1030          * queued invalidation is already setup and enabled.
1031          */
1032         if (iommu->qi)
1033                 return 0;
1034
1035         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1036         if (!iommu->qi)
1037                 return -ENOMEM;
1038
1039         qi = iommu->qi;
1040
1041         qi->desc = (void *)(get_zeroed_page(GFP_ATOMIC));
1042         if (!qi->desc) {
1043                 kfree(qi);
1044                 iommu->qi = 0;
1045                 return -ENOMEM;
1046         }
1047
1048         qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1049         if (!qi->desc_status) {
1050                 free_page((unsigned long) qi->desc);
1051                 kfree(qi);
1052                 iommu->qi = 0;
1053                 return -ENOMEM;
1054         }
1055
1056         qi->free_head = qi->free_tail = 0;
1057         qi->free_cnt = QI_LENGTH;
1058
1059         spin_lock_init(&qi->q_lock);
1060
1061         __dmar_enable_qi(iommu);
1062
1063         return 0;
1064 }
1065
1066 /* iommu interrupt handling. Most stuff are MSI-like. */
1067
1068 enum faulttype {
1069         DMA_REMAP,
1070         INTR_REMAP,
1071         UNKNOWN,
1072 };
1073
1074 static const char *dma_remap_fault_reasons[] =
1075 {
1076         "Software",
1077         "Present bit in root entry is clear",
1078         "Present bit in context entry is clear",
1079         "Invalid context entry",
1080         "Access beyond MGAW",
1081         "PTE Write access is not set",
1082         "PTE Read access is not set",
1083         "Next page table ptr is invalid",
1084         "Root table address invalid",
1085         "Context table ptr is invalid",
1086         "non-zero reserved fields in RTP",
1087         "non-zero reserved fields in CTP",
1088         "non-zero reserved fields in PTE",
1089 };
1090
1091 static const char *intr_remap_fault_reasons[] =
1092 {
1093         "Detected reserved fields in the decoded interrupt-remapped request",
1094         "Interrupt index exceeded the interrupt-remapping table size",
1095         "Present field in the IRTE entry is clear",
1096         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1097         "Detected reserved fields in the IRTE entry",
1098         "Blocked a compatibility format interrupt request",
1099         "Blocked an interrupt request due to source-id verification failure",
1100 };
1101
1102 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
1103
1104 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1105 {
1106         if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
1107                                      ARRAY_SIZE(intr_remap_fault_reasons))) {
1108                 *fault_type = INTR_REMAP;
1109                 return intr_remap_fault_reasons[fault_reason - 0x20];
1110         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1111                 *fault_type = DMA_REMAP;
1112                 return dma_remap_fault_reasons[fault_reason];
1113         } else {
1114                 *fault_type = UNKNOWN;
1115                 return "Unknown";
1116         }
1117 }
1118
1119 void dmar_msi_unmask(unsigned int irq)
1120 {
1121         struct intel_iommu *iommu = get_irq_data(irq);
1122         unsigned long flag;
1123
1124         /* unmask it */
1125         spin_lock_irqsave(&iommu->register_lock, flag);
1126         writel(0, iommu->reg + DMAR_FECTL_REG);
1127         /* Read a reg to force flush the post write */
1128         readl(iommu->reg + DMAR_FECTL_REG);
1129         spin_unlock_irqrestore(&iommu->register_lock, flag);
1130 }
1131
1132 void dmar_msi_mask(unsigned int irq)
1133 {
1134         unsigned long flag;
1135         struct intel_iommu *iommu = get_irq_data(irq);
1136
1137         /* mask it */
1138         spin_lock_irqsave(&iommu->register_lock, flag);
1139         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1140         /* Read a reg to force flush the post write */
1141         readl(iommu->reg + DMAR_FECTL_REG);
1142         spin_unlock_irqrestore(&iommu->register_lock, flag);
1143 }
1144
1145 void dmar_msi_write(int irq, struct msi_msg *msg)
1146 {
1147         struct intel_iommu *iommu = get_irq_data(irq);
1148         unsigned long flag;
1149
1150         spin_lock_irqsave(&iommu->register_lock, flag);
1151         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1152         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1153         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1154         spin_unlock_irqrestore(&iommu->register_lock, flag);
1155 }
1156
1157 void dmar_msi_read(int irq, struct msi_msg *msg)
1158 {
1159         struct intel_iommu *iommu = get_irq_data(irq);
1160         unsigned long flag;
1161
1162         spin_lock_irqsave(&iommu->register_lock, flag);
1163         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1164         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1165         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1166         spin_unlock_irqrestore(&iommu->register_lock, flag);
1167 }
1168
1169 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1170                 u8 fault_reason, u16 source_id, unsigned long long addr)
1171 {
1172         const char *reason;
1173         int fault_type;
1174
1175         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1176
1177         if (fault_type == INTR_REMAP)
1178                 printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
1179                        "fault index %llx\n"
1180                         "INTR-REMAP:[fault reason %02d] %s\n",
1181                         (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1182                         PCI_FUNC(source_id & 0xFF), addr >> 48,
1183                         fault_reason, reason);
1184         else
1185                 printk(KERN_ERR
1186                        "DMAR:[%s] Request device [%02x:%02x.%d] "
1187                        "fault addr %llx \n"
1188                        "DMAR:[fault reason %02d] %s\n",
1189                        (type ? "DMA Read" : "DMA Write"),
1190                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1191                        PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1192         return 0;
1193 }
1194
1195 #define PRIMARY_FAULT_REG_LEN (16)
1196 irqreturn_t dmar_fault(int irq, void *dev_id)
1197 {
1198         struct intel_iommu *iommu = dev_id;
1199         int reg, fault_index;
1200         u32 fault_status;
1201         unsigned long flag;
1202
1203         spin_lock_irqsave(&iommu->register_lock, flag);
1204         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1205         if (fault_status)
1206                 printk(KERN_ERR "DRHD: handling fault status reg %x\n",
1207                        fault_status);
1208
1209         /* TBD: ignore advanced fault log currently */
1210         if (!(fault_status & DMA_FSTS_PPF))
1211                 goto clear_rest;
1212
1213         fault_index = dma_fsts_fault_record_index(fault_status);
1214         reg = cap_fault_reg_offset(iommu->cap);
1215         while (1) {
1216                 u8 fault_reason;
1217                 u16 source_id;
1218                 u64 guest_addr;
1219                 int type;
1220                 u32 data;
1221
1222                 /* highest 32 bits */
1223                 data = readl(iommu->reg + reg +
1224                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1225                 if (!(data & DMA_FRCD_F))
1226                         break;
1227
1228                 fault_reason = dma_frcd_fault_reason(data);
1229                 type = dma_frcd_type(data);
1230
1231                 data = readl(iommu->reg + reg +
1232                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1233                 source_id = dma_frcd_source_id(data);
1234
1235                 guest_addr = dmar_readq(iommu->reg + reg +
1236                                 fault_index * PRIMARY_FAULT_REG_LEN);
1237                 guest_addr = dma_frcd_page_addr(guest_addr);
1238                 /* clear the fault */
1239                 writel(DMA_FRCD_F, iommu->reg + reg +
1240                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1241
1242                 spin_unlock_irqrestore(&iommu->register_lock, flag);
1243
1244                 dmar_fault_do_one(iommu, type, fault_reason,
1245                                 source_id, guest_addr);
1246
1247                 fault_index++;
1248                 if (fault_index >= cap_num_fault_regs(iommu->cap))
1249                         fault_index = 0;
1250                 spin_lock_irqsave(&iommu->register_lock, flag);
1251         }
1252 clear_rest:
1253         /* clear all the other faults */
1254         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1255         writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1256
1257         spin_unlock_irqrestore(&iommu->register_lock, flag);
1258         return IRQ_HANDLED;
1259 }
1260
1261 int dmar_set_interrupt(struct intel_iommu *iommu)
1262 {
1263         int irq, ret;
1264
1265         /*
1266          * Check if the fault interrupt is already initialized.
1267          */
1268         if (iommu->irq)
1269                 return 0;
1270
1271         irq = create_irq();
1272         if (!irq) {
1273                 printk(KERN_ERR "IOMMU: no free vectors\n");
1274                 return -EINVAL;
1275         }
1276
1277         set_irq_data(irq, iommu);
1278         iommu->irq = irq;
1279
1280         ret = arch_setup_dmar_msi(irq);
1281         if (ret) {
1282                 set_irq_data(irq, NULL);
1283                 iommu->irq = 0;
1284                 destroy_irq(irq);
1285                 return ret;
1286         }
1287
1288         ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
1289         if (ret)
1290                 printk(KERN_ERR "IOMMU: can't request irq\n");
1291         return ret;
1292 }
1293
1294 int __init enable_drhd_fault_handling(void)
1295 {
1296         struct dmar_drhd_unit *drhd;
1297
1298         /*
1299          * Enable fault control interrupt.
1300          */
1301         for_each_drhd_unit(drhd) {
1302                 int ret;
1303                 struct intel_iommu *iommu = drhd->iommu;
1304                 ret = dmar_set_interrupt(iommu);
1305
1306                 if (ret) {
1307                         printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
1308                                " interrupt, ret %d\n",
1309                                (unsigned long long)drhd->reg_base_addr, ret);
1310                         return -1;
1311                 }
1312         }
1313
1314         return 0;
1315 }
1316
1317 /*
1318  * Re-enable Queued Invalidation interface.
1319  */
1320 int dmar_reenable_qi(struct intel_iommu *iommu)
1321 {
1322         if (!ecap_qis(iommu->ecap))
1323                 return -ENOENT;
1324
1325         if (!iommu->qi)
1326                 return -ENOENT;
1327
1328         /*
1329          * First disable queued invalidation.
1330          */
1331         dmar_disable_qi(iommu);
1332         /*
1333          * Then enable queued invalidation again. Since there is no pending
1334          * invalidation requests now, it's safe to re-enable queued
1335          * invalidation.
1336          */
1337         __dmar_enable_qi(iommu);
1338
1339         return 0;
1340 }
1341
1342 /*
1343  * Check interrupt remapping support in DMAR table description.
1344  */
1345 int dmar_ir_support(void)
1346 {
1347         struct acpi_table_dmar *dmar;
1348         dmar = (struct acpi_table_dmar *)dmar_tbl;
1349         return dmar->flags & 0x1;
1350 }