intel-iommu: Make "Unknown DMAR structure" message more informative
[safe/jmp/linux-2.6] / drivers / pci / dmar.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28
29 #include <linux/pci.h>
30 #include <linux/dmar.h>
31 #include <linux/iova.h>
32 #include <linux/intel-iommu.h>
33 #include <linux/timer.h>
34 #include <linux/irq.h>
35 #include <linux/interrupt.h>
36 #include <linux/tboot.h>
37 #include <linux/dmi.h>
38
39 #define PREFIX "DMAR: "
40
41 /* No locks are needed as DMA remapping hardware unit
42  * list is constructed at boot time and hotplug of
43  * these units are not supported by the architecture.
44  */
45 LIST_HEAD(dmar_drhd_units);
46
47 static struct acpi_table_header * __initdata dmar_tbl;
48 static acpi_size dmar_tbl_size;
49
50 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
51 {
52         /*
53          * add INCLUDE_ALL at the tail, so scan the list will find it at
54          * the very end.
55          */
56         if (drhd->include_all)
57                 list_add_tail(&drhd->list, &dmar_drhd_units);
58         else
59                 list_add(&drhd->list, &dmar_drhd_units);
60 }
61
62 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
63                                            struct pci_dev **dev, u16 segment)
64 {
65         struct pci_bus *bus;
66         struct pci_dev *pdev = NULL;
67         struct acpi_dmar_pci_path *path;
68         int count;
69
70         bus = pci_find_bus(segment, scope->bus);
71         path = (struct acpi_dmar_pci_path *)(scope + 1);
72         count = (scope->length - sizeof(struct acpi_dmar_device_scope))
73                 / sizeof(struct acpi_dmar_pci_path);
74
75         while (count) {
76                 if (pdev)
77                         pci_dev_put(pdev);
78                 /*
79                  * Some BIOSes list non-exist devices in DMAR table, just
80                  * ignore it
81                  */
82                 if (!bus) {
83                         printk(KERN_WARNING
84                         PREFIX "Device scope bus [%d] not found\n",
85                         scope->bus);
86                         break;
87                 }
88                 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
89                 if (!pdev) {
90                         printk(KERN_WARNING PREFIX
91                         "Device scope device [%04x:%02x:%02x.%02x] not found\n",
92                                 segment, bus->number, path->dev, path->fn);
93                         break;
94                 }
95                 path ++;
96                 count --;
97                 bus = pdev->subordinate;
98         }
99         if (!pdev) {
100                 printk(KERN_WARNING PREFIX
101                 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
102                 segment, scope->bus, path->dev, path->fn);
103                 *dev = NULL;
104                 return 0;
105         }
106         if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
107                         pdev->subordinate) || (scope->entry_type == \
108                         ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
109                 pci_dev_put(pdev);
110                 printk(KERN_WARNING PREFIX
111                         "Device scope type does not match for %s\n",
112                          pci_name(pdev));
113                 return -EINVAL;
114         }
115         *dev = pdev;
116         return 0;
117 }
118
119 static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
120                                        struct pci_dev ***devices, u16 segment)
121 {
122         struct acpi_dmar_device_scope *scope;
123         void * tmp = start;
124         int index;
125         int ret;
126
127         *cnt = 0;
128         while (start < end) {
129                 scope = start;
130                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
131                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
132                         (*cnt)++;
133                 else
134                         printk(KERN_WARNING PREFIX
135                                 "Unsupported device scope\n");
136                 start += scope->length;
137         }
138         if (*cnt == 0)
139                 return 0;
140
141         *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
142         if (!*devices)
143                 return -ENOMEM;
144
145         start = tmp;
146         index = 0;
147         while (start < end) {
148                 scope = start;
149                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
150                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
151                         ret = dmar_parse_one_dev_scope(scope,
152                                 &(*devices)[index], segment);
153                         if (ret) {
154                                 kfree(*devices);
155                                 return ret;
156                         }
157                         index ++;
158                 }
159                 start += scope->length;
160         }
161
162         return 0;
163 }
164
165 /**
166  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
167  * structure which uniquely represent one DMA remapping hardware unit
168  * present in the platform
169  */
170 static int __init
171 dmar_parse_one_drhd(struct acpi_dmar_header *header)
172 {
173         struct acpi_dmar_hardware_unit *drhd;
174         struct dmar_drhd_unit *dmaru;
175         int ret = 0;
176
177         drhd = (struct acpi_dmar_hardware_unit *)header;
178         if (!drhd->address) {
179                 /* Promote an attitude of violence to a BIOS engineer today */
180                 WARN(1, "Your BIOS is broken; DMAR reported at address zero!\n"
181                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
182                      dmi_get_system_info(DMI_BIOS_VENDOR),
183                      dmi_get_system_info(DMI_BIOS_VERSION),
184                      dmi_get_system_info(DMI_PRODUCT_VERSION));
185                 return -ENODEV;
186         }
187         dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
188         if (!dmaru)
189                 return -ENOMEM;
190
191         dmaru->hdr = header;
192         dmaru->reg_base_addr = drhd->address;
193         dmaru->segment = drhd->segment;
194         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
195
196         ret = alloc_iommu(dmaru);
197         if (ret) {
198                 kfree(dmaru);
199                 return ret;
200         }
201         dmar_register_drhd_unit(dmaru);
202         return 0;
203 }
204
205 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
206 {
207         struct acpi_dmar_hardware_unit *drhd;
208         int ret = 0;
209
210         drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
211
212         if (dmaru->include_all)
213                 return 0;
214
215         ret = dmar_parse_dev_scope((void *)(drhd + 1),
216                                 ((void *)drhd) + drhd->header.length,
217                                 &dmaru->devices_cnt, &dmaru->devices,
218                                 drhd->segment);
219         if (ret) {
220                 list_del(&dmaru->list);
221                 kfree(dmaru);
222         }
223         return ret;
224 }
225
226 #ifdef CONFIG_DMAR
227 LIST_HEAD(dmar_rmrr_units);
228
229 static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
230 {
231         list_add(&rmrr->list, &dmar_rmrr_units);
232 }
233
234
235 static int __init
236 dmar_parse_one_rmrr(struct acpi_dmar_header *header)
237 {
238         struct acpi_dmar_reserved_memory *rmrr;
239         struct dmar_rmrr_unit *rmrru;
240
241         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
242         if (!rmrru)
243                 return -ENOMEM;
244
245         rmrru->hdr = header;
246         rmrr = (struct acpi_dmar_reserved_memory *)header;
247         rmrru->base_address = rmrr->base_address;
248         rmrru->end_address = rmrr->end_address;
249
250         dmar_register_rmrr_unit(rmrru);
251         return 0;
252 }
253
254 static int __init
255 rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
256 {
257         struct acpi_dmar_reserved_memory *rmrr;
258         int ret;
259
260         rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
261         ret = dmar_parse_dev_scope((void *)(rmrr + 1),
262                 ((void *)rmrr) + rmrr->header.length,
263                 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
264
265         if (ret || (rmrru->devices_cnt == 0)) {
266                 list_del(&rmrru->list);
267                 kfree(rmrru);
268         }
269         return ret;
270 }
271
272 static LIST_HEAD(dmar_atsr_units);
273
274 static int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
275 {
276         struct acpi_dmar_atsr *atsr;
277         struct dmar_atsr_unit *atsru;
278
279         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
280         atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
281         if (!atsru)
282                 return -ENOMEM;
283
284         atsru->hdr = hdr;
285         atsru->include_all = atsr->flags & 0x1;
286
287         list_add(&atsru->list, &dmar_atsr_units);
288
289         return 0;
290 }
291
292 static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
293 {
294         int rc;
295         struct acpi_dmar_atsr *atsr;
296
297         if (atsru->include_all)
298                 return 0;
299
300         atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
301         rc = dmar_parse_dev_scope((void *)(atsr + 1),
302                                 (void *)atsr + atsr->header.length,
303                                 &atsru->devices_cnt, &atsru->devices,
304                                 atsr->segment);
305         if (rc || !atsru->devices_cnt) {
306                 list_del(&atsru->list);
307                 kfree(atsru);
308         }
309
310         return rc;
311 }
312
313 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
314 {
315         int i;
316         struct pci_bus *bus;
317         struct acpi_dmar_atsr *atsr;
318         struct dmar_atsr_unit *atsru;
319
320         list_for_each_entry(atsru, &dmar_atsr_units, list) {
321                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
322                 if (atsr->segment == pci_domain_nr(dev->bus))
323                         goto found;
324         }
325
326         return 0;
327
328 found:
329         for (bus = dev->bus; bus; bus = bus->parent) {
330                 struct pci_dev *bridge = bus->self;
331
332                 if (!bridge || !bridge->is_pcie ||
333                     bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
334                         return 0;
335
336                 if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
337                         for (i = 0; i < atsru->devices_cnt; i++)
338                                 if (atsru->devices[i] == bridge)
339                                         return 1;
340                         break;
341                 }
342         }
343
344         if (atsru->include_all)
345                 return 1;
346
347         return 0;
348 }
349 #endif
350
351 static void __init
352 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
353 {
354         struct acpi_dmar_hardware_unit *drhd;
355         struct acpi_dmar_reserved_memory *rmrr;
356         struct acpi_dmar_atsr *atsr;
357
358         switch (header->type) {
359         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
360                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
361                                     header);
362                 printk (KERN_INFO PREFIX
363                         "DRHD base: %#016Lx flags: %#x\n",
364                         (unsigned long long)drhd->address, drhd->flags);
365                 break;
366         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
367                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
368                                     header);
369                 printk (KERN_INFO PREFIX
370                         "RMRR base: %#016Lx end: %#016Lx\n",
371                         (unsigned long long)rmrr->base_address,
372                         (unsigned long long)rmrr->end_address);
373                 break;
374         case ACPI_DMAR_TYPE_ATSR:
375                 atsr = container_of(header, struct acpi_dmar_atsr, header);
376                 printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags);
377                 break;
378         }
379 }
380
381 /**
382  * dmar_table_detect - checks to see if the platform supports DMAR devices
383  */
384 static int __init dmar_table_detect(void)
385 {
386         acpi_status status = AE_OK;
387
388         /* if we could find DMAR table, then there are DMAR devices */
389         status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
390                                 (struct acpi_table_header **)&dmar_tbl,
391                                 &dmar_tbl_size);
392
393         if (ACPI_SUCCESS(status) && !dmar_tbl) {
394                 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
395                 status = AE_NOT_FOUND;
396         }
397
398         return (ACPI_SUCCESS(status) ? 1 : 0);
399 }
400
401 /**
402  * parse_dmar_table - parses the DMA reporting table
403  */
404 static int __init
405 parse_dmar_table(void)
406 {
407         struct acpi_table_dmar *dmar;
408         struct acpi_dmar_header *entry_header;
409         int ret = 0;
410
411         /*
412          * Do it again, earlier dmar_tbl mapping could be mapped with
413          * fixed map.
414          */
415         dmar_table_detect();
416
417         /*
418          * ACPI tables may not be DMA protected by tboot, so use DMAR copy
419          * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
420          */
421         dmar_tbl = tboot_get_dmar_table(dmar_tbl);
422
423         dmar = (struct acpi_table_dmar *)dmar_tbl;
424         if (!dmar)
425                 return -ENODEV;
426
427         if (dmar->width < PAGE_SHIFT - 1) {
428                 printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
429                 return -EINVAL;
430         }
431
432         printk (KERN_INFO PREFIX "Host address width %d\n",
433                 dmar->width + 1);
434
435         entry_header = (struct acpi_dmar_header *)(dmar + 1);
436         while (((unsigned long)entry_header) <
437                         (((unsigned long)dmar) + dmar_tbl->length)) {
438                 /* Avoid looping forever on bad ACPI tables */
439                 if (entry_header->length == 0) {
440                         printk(KERN_WARNING PREFIX
441                                 "Invalid 0-length structure\n");
442                         ret = -EINVAL;
443                         break;
444                 }
445
446                 dmar_table_print_dmar_entry(entry_header);
447
448                 switch (entry_header->type) {
449                 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
450                         ret = dmar_parse_one_drhd(entry_header);
451                         break;
452                 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
453 #ifdef CONFIG_DMAR
454                         ret = dmar_parse_one_rmrr(entry_header);
455 #endif
456                         break;
457                 case ACPI_DMAR_TYPE_ATSR:
458 #ifdef CONFIG_DMAR
459                         ret = dmar_parse_one_atsr(entry_header);
460 #endif
461                         break;
462                 default:
463                         printk(KERN_WARNING PREFIX
464                                 "Unknown DMAR structure type %d\n",
465                                 entry_header->type);
466                         ret = 0; /* for forward compatibility */
467                         break;
468                 }
469                 if (ret)
470                         break;
471
472                 entry_header = ((void *)entry_header + entry_header->length);
473         }
474         return ret;
475 }
476
477 int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
478                           struct pci_dev *dev)
479 {
480         int index;
481
482         while (dev) {
483                 for (index = 0; index < cnt; index++)
484                         if (dev == devices[index])
485                                 return 1;
486
487                 /* Check our parent */
488                 dev = dev->bus->self;
489         }
490
491         return 0;
492 }
493
494 struct dmar_drhd_unit *
495 dmar_find_matched_drhd_unit(struct pci_dev *dev)
496 {
497         struct dmar_drhd_unit *dmaru = NULL;
498         struct acpi_dmar_hardware_unit *drhd;
499
500         list_for_each_entry(dmaru, &dmar_drhd_units, list) {
501                 drhd = container_of(dmaru->hdr,
502                                     struct acpi_dmar_hardware_unit,
503                                     header);
504
505                 if (dmaru->include_all &&
506                     drhd->segment == pci_domain_nr(dev->bus))
507                         return dmaru;
508
509                 if (dmar_pci_device_match(dmaru->devices,
510                                           dmaru->devices_cnt, dev))
511                         return dmaru;
512         }
513
514         return NULL;
515 }
516
517 int __init dmar_dev_scope_init(void)
518 {
519         struct dmar_drhd_unit *drhd, *drhd_n;
520         int ret = -ENODEV;
521
522         list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
523                 ret = dmar_parse_dev(drhd);
524                 if (ret)
525                         return ret;
526         }
527
528 #ifdef CONFIG_DMAR
529         {
530                 struct dmar_rmrr_unit *rmrr, *rmrr_n;
531                 struct dmar_atsr_unit *atsr, *atsr_n;
532
533                 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
534                         ret = rmrr_parse_dev(rmrr);
535                         if (ret)
536                                 return ret;
537                 }
538
539                 list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
540                         ret = atsr_parse_dev(atsr);
541                         if (ret)
542                                 return ret;
543                 }
544         }
545 #endif
546
547         return ret;
548 }
549
550
551 int __init dmar_table_init(void)
552 {
553         static int dmar_table_initialized;
554         int ret;
555
556         if (dmar_table_initialized)
557                 return 0;
558
559         dmar_table_initialized = 1;
560
561         ret = parse_dmar_table();
562         if (ret) {
563                 if (ret != -ENODEV)
564                         printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
565                 return ret;
566         }
567
568         if (list_empty(&dmar_drhd_units)) {
569                 printk(KERN_INFO PREFIX "No DMAR devices found\n");
570                 return -ENODEV;
571         }
572
573 #ifdef CONFIG_DMAR
574         if (list_empty(&dmar_rmrr_units))
575                 printk(KERN_INFO PREFIX "No RMRR found\n");
576
577         if (list_empty(&dmar_atsr_units))
578                 printk(KERN_INFO PREFIX "No ATSR found\n");
579 #endif
580
581         return 0;
582 }
583
584 void __init detect_intel_iommu(void)
585 {
586         int ret;
587
588         ret = dmar_table_detect();
589
590         {
591 #ifdef CONFIG_INTR_REMAP
592                 struct acpi_table_dmar *dmar;
593                 /*
594                  * for now we will disable dma-remapping when interrupt
595                  * remapping is enabled.
596                  * When support for queued invalidation for IOTLB invalidation
597                  * is added, we will not need this any more.
598                  */
599                 dmar = (struct acpi_table_dmar *) dmar_tbl;
600                 if (ret && cpu_has_x2apic && dmar->flags & 0x1)
601                         printk(KERN_INFO
602                                "Queued invalidation will be enabled to support "
603                                "x2apic and Intr-remapping.\n");
604 #endif
605 #ifdef CONFIG_DMAR
606                 if (ret && !no_iommu && !iommu_detected && !swiotlb &&
607                     !dmar_disabled)
608                         iommu_detected = 1;
609 #endif
610         }
611         early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
612         dmar_tbl = NULL;
613 }
614
615
616 int alloc_iommu(struct dmar_drhd_unit *drhd)
617 {
618         struct intel_iommu *iommu;
619         int map_size;
620         u32 ver;
621         static int iommu_allocated = 0;
622         int agaw = 0;
623         int msagaw = 0;
624
625         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
626         if (!iommu)
627                 return -ENOMEM;
628
629         iommu->seq_id = iommu_allocated++;
630         sprintf (iommu->name, "dmar%d", iommu->seq_id);
631
632         iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
633         if (!iommu->reg) {
634                 printk(KERN_ERR "IOMMU: can't map the region\n");
635                 goto error;
636         }
637         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
638         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
639
640         if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
641                 /* Promote an attitude of violence to a BIOS engineer today */
642                 WARN(1, "Your BIOS is broken; DMAR reported at address %llx returns all ones!\n"
643                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
644                      drhd->reg_base_addr,
645                      dmi_get_system_info(DMI_BIOS_VENDOR),
646                      dmi_get_system_info(DMI_BIOS_VERSION),
647                      dmi_get_system_info(DMI_PRODUCT_VERSION));
648                 goto err_unmap;
649         }
650
651 #ifdef CONFIG_DMAR
652         agaw = iommu_calculate_agaw(iommu);
653         if (agaw < 0) {
654                 printk(KERN_ERR
655                        "Cannot get a valid agaw for iommu (seq_id = %d)\n",
656                        iommu->seq_id);
657                 goto err_unmap;
658         }
659         msagaw = iommu_calculate_max_sagaw(iommu);
660         if (msagaw < 0) {
661                 printk(KERN_ERR
662                         "Cannot get a valid max agaw for iommu (seq_id = %d)\n",
663                         iommu->seq_id);
664                 goto err_unmap;
665         }
666 #endif
667         iommu->agaw = agaw;
668         iommu->msagaw = msagaw;
669
670         /* the registers might be more than one page */
671         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
672                 cap_max_fault_reg_offset(iommu->cap));
673         map_size = VTD_PAGE_ALIGN(map_size);
674         if (map_size > VTD_PAGE_SIZE) {
675                 iounmap(iommu->reg);
676                 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
677                 if (!iommu->reg) {
678                         printk(KERN_ERR "IOMMU: can't map the region\n");
679                         goto error;
680                 }
681         }
682
683         ver = readl(iommu->reg + DMAR_VER_REG);
684         pr_info("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
685                 (unsigned long long)drhd->reg_base_addr,
686                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
687                 (unsigned long long)iommu->cap,
688                 (unsigned long long)iommu->ecap);
689
690         spin_lock_init(&iommu->register_lock);
691
692         drhd->iommu = iommu;
693         return 0;
694
695  err_unmap:
696         iounmap(iommu->reg);
697  error:
698         kfree(iommu);
699         return -1;
700 }
701
702 void free_iommu(struct intel_iommu *iommu)
703 {
704         if (!iommu)
705                 return;
706
707 #ifdef CONFIG_DMAR
708         free_dmar_iommu(iommu);
709 #endif
710
711         if (iommu->reg)
712                 iounmap(iommu->reg);
713         kfree(iommu);
714 }
715
716 /*
717  * Reclaim all the submitted descriptors which have completed its work.
718  */
719 static inline void reclaim_free_desc(struct q_inval *qi)
720 {
721         while (qi->desc_status[qi->free_tail] == QI_DONE ||
722                qi->desc_status[qi->free_tail] == QI_ABORT) {
723                 qi->desc_status[qi->free_tail] = QI_FREE;
724                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
725                 qi->free_cnt++;
726         }
727 }
728
729 static int qi_check_fault(struct intel_iommu *iommu, int index)
730 {
731         u32 fault;
732         int head, tail;
733         struct q_inval *qi = iommu->qi;
734         int wait_index = (index + 1) % QI_LENGTH;
735
736         if (qi->desc_status[wait_index] == QI_ABORT)
737                 return -EAGAIN;
738
739         fault = readl(iommu->reg + DMAR_FSTS_REG);
740
741         /*
742          * If IQE happens, the head points to the descriptor associated
743          * with the error. No new descriptors are fetched until the IQE
744          * is cleared.
745          */
746         if (fault & DMA_FSTS_IQE) {
747                 head = readl(iommu->reg + DMAR_IQH_REG);
748                 if ((head >> DMAR_IQ_SHIFT) == index) {
749                         printk(KERN_ERR "VT-d detected invalid descriptor: "
750                                 "low=%llx, high=%llx\n",
751                                 (unsigned long long)qi->desc[index].low,
752                                 (unsigned long long)qi->desc[index].high);
753                         memcpy(&qi->desc[index], &qi->desc[wait_index],
754                                         sizeof(struct qi_desc));
755                         __iommu_flush_cache(iommu, &qi->desc[index],
756                                         sizeof(struct qi_desc));
757                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
758                         return -EINVAL;
759                 }
760         }
761
762         /*
763          * If ITE happens, all pending wait_desc commands are aborted.
764          * No new descriptors are fetched until the ITE is cleared.
765          */
766         if (fault & DMA_FSTS_ITE) {
767                 head = readl(iommu->reg + DMAR_IQH_REG);
768                 head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
769                 head |= 1;
770                 tail = readl(iommu->reg + DMAR_IQT_REG);
771                 tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
772
773                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
774
775                 do {
776                         if (qi->desc_status[head] == QI_IN_USE)
777                                 qi->desc_status[head] = QI_ABORT;
778                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
779                 } while (head != tail);
780
781                 if (qi->desc_status[wait_index] == QI_ABORT)
782                         return -EAGAIN;
783         }
784
785         if (fault & DMA_FSTS_ICE)
786                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
787
788         return 0;
789 }
790
791 /*
792  * Submit the queued invalidation descriptor to the remapping
793  * hardware unit and wait for its completion.
794  */
795 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
796 {
797         int rc;
798         struct q_inval *qi = iommu->qi;
799         struct qi_desc *hw, wait_desc;
800         int wait_index, index;
801         unsigned long flags;
802
803         if (!qi)
804                 return 0;
805
806         hw = qi->desc;
807
808 restart:
809         rc = 0;
810
811         spin_lock_irqsave(&qi->q_lock, flags);
812         while (qi->free_cnt < 3) {
813                 spin_unlock_irqrestore(&qi->q_lock, flags);
814                 cpu_relax();
815                 spin_lock_irqsave(&qi->q_lock, flags);
816         }
817
818         index = qi->free_head;
819         wait_index = (index + 1) % QI_LENGTH;
820
821         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
822
823         hw[index] = *desc;
824
825         wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
826                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
827         wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
828
829         hw[wait_index] = wait_desc;
830
831         __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
832         __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
833
834         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
835         qi->free_cnt -= 2;
836
837         /*
838          * update the HW tail register indicating the presence of
839          * new descriptors.
840          */
841         writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
842
843         while (qi->desc_status[wait_index] != QI_DONE) {
844                 /*
845                  * We will leave the interrupts disabled, to prevent interrupt
846                  * context to queue another cmd while a cmd is already submitted
847                  * and waiting for completion on this cpu. This is to avoid
848                  * a deadlock where the interrupt context can wait indefinitely
849                  * for free slots in the queue.
850                  */
851                 rc = qi_check_fault(iommu, index);
852                 if (rc)
853                         break;
854
855                 spin_unlock(&qi->q_lock);
856                 cpu_relax();
857                 spin_lock(&qi->q_lock);
858         }
859
860         qi->desc_status[index] = QI_DONE;
861
862         reclaim_free_desc(qi);
863         spin_unlock_irqrestore(&qi->q_lock, flags);
864
865         if (rc == -EAGAIN)
866                 goto restart;
867
868         return rc;
869 }
870
871 /*
872  * Flush the global interrupt entry cache.
873  */
874 void qi_global_iec(struct intel_iommu *iommu)
875 {
876         struct qi_desc desc;
877
878         desc.low = QI_IEC_TYPE;
879         desc.high = 0;
880
881         /* should never fail */
882         qi_submit_sync(&desc, iommu);
883 }
884
885 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
886                       u64 type)
887 {
888         struct qi_desc desc;
889
890         desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
891                         | QI_CC_GRAN(type) | QI_CC_TYPE;
892         desc.high = 0;
893
894         qi_submit_sync(&desc, iommu);
895 }
896
897 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
898                     unsigned int size_order, u64 type)
899 {
900         u8 dw = 0, dr = 0;
901
902         struct qi_desc desc;
903         int ih = 0;
904
905         if (cap_write_drain(iommu->cap))
906                 dw = 1;
907
908         if (cap_read_drain(iommu->cap))
909                 dr = 1;
910
911         desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
912                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
913         desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
914                 | QI_IOTLB_AM(size_order);
915
916         qi_submit_sync(&desc, iommu);
917 }
918
919 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
920                         u64 addr, unsigned mask)
921 {
922         struct qi_desc desc;
923
924         if (mask) {
925                 BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
926                 addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
927                 desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
928         } else
929                 desc.high = QI_DEV_IOTLB_ADDR(addr);
930
931         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
932                 qdep = 0;
933
934         desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
935                    QI_DIOTLB_TYPE;
936
937         qi_submit_sync(&desc, iommu);
938 }
939
940 /*
941  * Disable Queued Invalidation interface.
942  */
943 void dmar_disable_qi(struct intel_iommu *iommu)
944 {
945         unsigned long flags;
946         u32 sts;
947         cycles_t start_time = get_cycles();
948
949         if (!ecap_qis(iommu->ecap))
950                 return;
951
952         spin_lock_irqsave(&iommu->register_lock, flags);
953
954         sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
955         if (!(sts & DMA_GSTS_QIES))
956                 goto end;
957
958         /*
959          * Give a chance to HW to complete the pending invalidation requests.
960          */
961         while ((readl(iommu->reg + DMAR_IQT_REG) !=
962                 readl(iommu->reg + DMAR_IQH_REG)) &&
963                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
964                 cpu_relax();
965
966         iommu->gcmd &= ~DMA_GCMD_QIE;
967         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
968
969         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
970                       !(sts & DMA_GSTS_QIES), sts);
971 end:
972         spin_unlock_irqrestore(&iommu->register_lock, flags);
973 }
974
975 /*
976  * Enable queued invalidation.
977  */
978 static void __dmar_enable_qi(struct intel_iommu *iommu)
979 {
980         u32 sts;
981         unsigned long flags;
982         struct q_inval *qi = iommu->qi;
983
984         qi->free_head = qi->free_tail = 0;
985         qi->free_cnt = QI_LENGTH;
986
987         spin_lock_irqsave(&iommu->register_lock, flags);
988
989         /* write zero to the tail reg */
990         writel(0, iommu->reg + DMAR_IQT_REG);
991
992         dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
993
994         iommu->gcmd |= DMA_GCMD_QIE;
995         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
996
997         /* Make sure hardware complete it */
998         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
999
1000         spin_unlock_irqrestore(&iommu->register_lock, flags);
1001 }
1002
1003 /*
1004  * Enable Queued Invalidation interface. This is a must to support
1005  * interrupt-remapping. Also used by DMA-remapping, which replaces
1006  * register based IOTLB invalidation.
1007  */
1008 int dmar_enable_qi(struct intel_iommu *iommu)
1009 {
1010         struct q_inval *qi;
1011
1012         if (!ecap_qis(iommu->ecap))
1013                 return -ENOENT;
1014
1015         /*
1016          * queued invalidation is already setup and enabled.
1017          */
1018         if (iommu->qi)
1019                 return 0;
1020
1021         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1022         if (!iommu->qi)
1023                 return -ENOMEM;
1024
1025         qi = iommu->qi;
1026
1027         qi->desc = (void *)(get_zeroed_page(GFP_ATOMIC));
1028         if (!qi->desc) {
1029                 kfree(qi);
1030                 iommu->qi = 0;
1031                 return -ENOMEM;
1032         }
1033
1034         qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1035         if (!qi->desc_status) {
1036                 free_page((unsigned long) qi->desc);
1037                 kfree(qi);
1038                 iommu->qi = 0;
1039                 return -ENOMEM;
1040         }
1041
1042         qi->free_head = qi->free_tail = 0;
1043         qi->free_cnt = QI_LENGTH;
1044
1045         spin_lock_init(&qi->q_lock);
1046
1047         __dmar_enable_qi(iommu);
1048
1049         return 0;
1050 }
1051
1052 /* iommu interrupt handling. Most stuff are MSI-like. */
1053
1054 enum faulttype {
1055         DMA_REMAP,
1056         INTR_REMAP,
1057         UNKNOWN,
1058 };
1059
1060 static const char *dma_remap_fault_reasons[] =
1061 {
1062         "Software",
1063         "Present bit in root entry is clear",
1064         "Present bit in context entry is clear",
1065         "Invalid context entry",
1066         "Access beyond MGAW",
1067         "PTE Write access is not set",
1068         "PTE Read access is not set",
1069         "Next page table ptr is invalid",
1070         "Root table address invalid",
1071         "Context table ptr is invalid",
1072         "non-zero reserved fields in RTP",
1073         "non-zero reserved fields in CTP",
1074         "non-zero reserved fields in PTE",
1075 };
1076
1077 static const char *intr_remap_fault_reasons[] =
1078 {
1079         "Detected reserved fields in the decoded interrupt-remapped request",
1080         "Interrupt index exceeded the interrupt-remapping table size",
1081         "Present field in the IRTE entry is clear",
1082         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1083         "Detected reserved fields in the IRTE entry",
1084         "Blocked a compatibility format interrupt request",
1085         "Blocked an interrupt request due to source-id verification failure",
1086 };
1087
1088 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
1089
1090 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1091 {
1092         if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
1093                                      ARRAY_SIZE(intr_remap_fault_reasons))) {
1094                 *fault_type = INTR_REMAP;
1095                 return intr_remap_fault_reasons[fault_reason - 0x20];
1096         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1097                 *fault_type = DMA_REMAP;
1098                 return dma_remap_fault_reasons[fault_reason];
1099         } else {
1100                 *fault_type = UNKNOWN;
1101                 return "Unknown";
1102         }
1103 }
1104
1105 void dmar_msi_unmask(unsigned int irq)
1106 {
1107         struct intel_iommu *iommu = get_irq_data(irq);
1108         unsigned long flag;
1109
1110         /* unmask it */
1111         spin_lock_irqsave(&iommu->register_lock, flag);
1112         writel(0, iommu->reg + DMAR_FECTL_REG);
1113         /* Read a reg to force flush the post write */
1114         readl(iommu->reg + DMAR_FECTL_REG);
1115         spin_unlock_irqrestore(&iommu->register_lock, flag);
1116 }
1117
1118 void dmar_msi_mask(unsigned int irq)
1119 {
1120         unsigned long flag;
1121         struct intel_iommu *iommu = get_irq_data(irq);
1122
1123         /* mask it */
1124         spin_lock_irqsave(&iommu->register_lock, flag);
1125         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1126         /* Read a reg to force flush the post write */
1127         readl(iommu->reg + DMAR_FECTL_REG);
1128         spin_unlock_irqrestore(&iommu->register_lock, flag);
1129 }
1130
1131 void dmar_msi_write(int irq, struct msi_msg *msg)
1132 {
1133         struct intel_iommu *iommu = get_irq_data(irq);
1134         unsigned long flag;
1135
1136         spin_lock_irqsave(&iommu->register_lock, flag);
1137         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1138         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1139         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1140         spin_unlock_irqrestore(&iommu->register_lock, flag);
1141 }
1142
1143 void dmar_msi_read(int irq, struct msi_msg *msg)
1144 {
1145         struct intel_iommu *iommu = get_irq_data(irq);
1146         unsigned long flag;
1147
1148         spin_lock_irqsave(&iommu->register_lock, flag);
1149         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1150         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1151         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1152         spin_unlock_irqrestore(&iommu->register_lock, flag);
1153 }
1154
1155 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1156                 u8 fault_reason, u16 source_id, unsigned long long addr)
1157 {
1158         const char *reason;
1159         int fault_type;
1160
1161         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1162
1163         if (fault_type == INTR_REMAP)
1164                 printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
1165                        "fault index %llx\n"
1166                         "INTR-REMAP:[fault reason %02d] %s\n",
1167                         (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1168                         PCI_FUNC(source_id & 0xFF), addr >> 48,
1169                         fault_reason, reason);
1170         else
1171                 printk(KERN_ERR
1172                        "DMAR:[%s] Request device [%02x:%02x.%d] "
1173                        "fault addr %llx \n"
1174                        "DMAR:[fault reason %02d] %s\n",
1175                        (type ? "DMA Read" : "DMA Write"),
1176                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1177                        PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1178         return 0;
1179 }
1180
1181 #define PRIMARY_FAULT_REG_LEN (16)
1182 irqreturn_t dmar_fault(int irq, void *dev_id)
1183 {
1184         struct intel_iommu *iommu = dev_id;
1185         int reg, fault_index;
1186         u32 fault_status;
1187         unsigned long flag;
1188
1189         spin_lock_irqsave(&iommu->register_lock, flag);
1190         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1191         if (fault_status)
1192                 printk(KERN_ERR "DRHD: handling fault status reg %x\n",
1193                        fault_status);
1194
1195         /* TBD: ignore advanced fault log currently */
1196         if (!(fault_status & DMA_FSTS_PPF))
1197                 goto clear_rest;
1198
1199         fault_index = dma_fsts_fault_record_index(fault_status);
1200         reg = cap_fault_reg_offset(iommu->cap);
1201         while (1) {
1202                 u8 fault_reason;
1203                 u16 source_id;
1204                 u64 guest_addr;
1205                 int type;
1206                 u32 data;
1207
1208                 /* highest 32 bits */
1209                 data = readl(iommu->reg + reg +
1210                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1211                 if (!(data & DMA_FRCD_F))
1212                         break;
1213
1214                 fault_reason = dma_frcd_fault_reason(data);
1215                 type = dma_frcd_type(data);
1216
1217                 data = readl(iommu->reg + reg +
1218                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1219                 source_id = dma_frcd_source_id(data);
1220
1221                 guest_addr = dmar_readq(iommu->reg + reg +
1222                                 fault_index * PRIMARY_FAULT_REG_LEN);
1223                 guest_addr = dma_frcd_page_addr(guest_addr);
1224                 /* clear the fault */
1225                 writel(DMA_FRCD_F, iommu->reg + reg +
1226                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1227
1228                 spin_unlock_irqrestore(&iommu->register_lock, flag);
1229
1230                 dmar_fault_do_one(iommu, type, fault_reason,
1231                                 source_id, guest_addr);
1232
1233                 fault_index++;
1234                 if (fault_index >= cap_num_fault_regs(iommu->cap))
1235                         fault_index = 0;
1236                 spin_lock_irqsave(&iommu->register_lock, flag);
1237         }
1238 clear_rest:
1239         /* clear all the other faults */
1240         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1241         writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1242
1243         spin_unlock_irqrestore(&iommu->register_lock, flag);
1244         return IRQ_HANDLED;
1245 }
1246
1247 int dmar_set_interrupt(struct intel_iommu *iommu)
1248 {
1249         int irq, ret;
1250
1251         /*
1252          * Check if the fault interrupt is already initialized.
1253          */
1254         if (iommu->irq)
1255                 return 0;
1256
1257         irq = create_irq();
1258         if (!irq) {
1259                 printk(KERN_ERR "IOMMU: no free vectors\n");
1260                 return -EINVAL;
1261         }
1262
1263         set_irq_data(irq, iommu);
1264         iommu->irq = irq;
1265
1266         ret = arch_setup_dmar_msi(irq);
1267         if (ret) {
1268                 set_irq_data(irq, NULL);
1269                 iommu->irq = 0;
1270                 destroy_irq(irq);
1271                 return ret;
1272         }
1273
1274         ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
1275         if (ret)
1276                 printk(KERN_ERR "IOMMU: can't request irq\n");
1277         return ret;
1278 }
1279
1280 int __init enable_drhd_fault_handling(void)
1281 {
1282         struct dmar_drhd_unit *drhd;
1283
1284         /*
1285          * Enable fault control interrupt.
1286          */
1287         for_each_drhd_unit(drhd) {
1288                 int ret;
1289                 struct intel_iommu *iommu = drhd->iommu;
1290                 ret = dmar_set_interrupt(iommu);
1291
1292                 if (ret) {
1293                         printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
1294                                " interrupt, ret %d\n",
1295                                (unsigned long long)drhd->reg_base_addr, ret);
1296                         return -1;
1297                 }
1298         }
1299
1300         return 0;
1301 }
1302
1303 /*
1304  * Re-enable Queued Invalidation interface.
1305  */
1306 int dmar_reenable_qi(struct intel_iommu *iommu)
1307 {
1308         if (!ecap_qis(iommu->ecap))
1309                 return -ENOENT;
1310
1311         if (!iommu->qi)
1312                 return -ENOENT;
1313
1314         /*
1315          * First disable queued invalidation.
1316          */
1317         dmar_disable_qi(iommu);
1318         /*
1319          * Then enable queued invalidation again. Since there is no pending
1320          * invalidation requests now, it's safe to re-enable queued
1321          * invalidation.
1322          */
1323         __dmar_enable_qi(iommu);
1324
1325         return 0;
1326 }
1327
1328 /*
1329  * Check interrupt remapping support in DMAR table description.
1330  */
1331 int dmar_ir_support(void)
1332 {
1333         struct acpi_table_dmar *dmar;
1334         dmar = (struct acpi_table_dmar *)dmar_tbl;
1335         return dmar->flags & 0x1;
1336 }