intel-iommu: Fix integer overflow in dma_pte_{clear_range,free_pagetable}()
[safe/jmp/linux-2.6] / drivers / pci / dmar.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28
29 #include <linux/pci.h>
30 #include <linux/dmar.h>
31 #include <linux/iova.h>
32 #include <linux/intel-iommu.h>
33 #include <linux/timer.h>
34 #include <linux/irq.h>
35 #include <linux/interrupt.h>
36
37 #undef PREFIX
38 #define PREFIX "DMAR:"
39
40 /* No locks are needed as DMA remapping hardware unit
41  * list is constructed at boot time and hotplug of
42  * these units are not supported by the architecture.
43  */
44 LIST_HEAD(dmar_drhd_units);
45
46 static struct acpi_table_header * __initdata dmar_tbl;
47 static acpi_size dmar_tbl_size;
48
49 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
50 {
51         /*
52          * add INCLUDE_ALL at the tail, so scan the list will find it at
53          * the very end.
54          */
55         if (drhd->include_all)
56                 list_add_tail(&drhd->list, &dmar_drhd_units);
57         else
58                 list_add(&drhd->list, &dmar_drhd_units);
59 }
60
61 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
62                                            struct pci_dev **dev, u16 segment)
63 {
64         struct pci_bus *bus;
65         struct pci_dev *pdev = NULL;
66         struct acpi_dmar_pci_path *path;
67         int count;
68
69         bus = pci_find_bus(segment, scope->bus);
70         path = (struct acpi_dmar_pci_path *)(scope + 1);
71         count = (scope->length - sizeof(struct acpi_dmar_device_scope))
72                 / sizeof(struct acpi_dmar_pci_path);
73
74         while (count) {
75                 if (pdev)
76                         pci_dev_put(pdev);
77                 /*
78                  * Some BIOSes list non-exist devices in DMAR table, just
79                  * ignore it
80                  */
81                 if (!bus) {
82                         printk(KERN_WARNING
83                         PREFIX "Device scope bus [%d] not found\n",
84                         scope->bus);
85                         break;
86                 }
87                 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
88                 if (!pdev) {
89                         printk(KERN_WARNING PREFIX
90                         "Device scope device [%04x:%02x:%02x.%02x] not found\n",
91                                 segment, bus->number, path->dev, path->fn);
92                         break;
93                 }
94                 path ++;
95                 count --;
96                 bus = pdev->subordinate;
97         }
98         if (!pdev) {
99                 printk(KERN_WARNING PREFIX
100                 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
101                 segment, scope->bus, path->dev, path->fn);
102                 *dev = NULL;
103                 return 0;
104         }
105         if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
106                         pdev->subordinate) || (scope->entry_type == \
107                         ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
108                 pci_dev_put(pdev);
109                 printk(KERN_WARNING PREFIX
110                         "Device scope type does not match for %s\n",
111                          pci_name(pdev));
112                 return -EINVAL;
113         }
114         *dev = pdev;
115         return 0;
116 }
117
118 static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
119                                        struct pci_dev ***devices, u16 segment)
120 {
121         struct acpi_dmar_device_scope *scope;
122         void * tmp = start;
123         int index;
124         int ret;
125
126         *cnt = 0;
127         while (start < end) {
128                 scope = start;
129                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
130                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
131                         (*cnt)++;
132                 else
133                         printk(KERN_WARNING PREFIX
134                                 "Unsupported device scope\n");
135                 start += scope->length;
136         }
137         if (*cnt == 0)
138                 return 0;
139
140         *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
141         if (!*devices)
142                 return -ENOMEM;
143
144         start = tmp;
145         index = 0;
146         while (start < end) {
147                 scope = start;
148                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
149                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
150                         ret = dmar_parse_one_dev_scope(scope,
151                                 &(*devices)[index], segment);
152                         if (ret) {
153                                 kfree(*devices);
154                                 return ret;
155                         }
156                         index ++;
157                 }
158                 start += scope->length;
159         }
160
161         return 0;
162 }
163
164 /**
165  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
166  * structure which uniquely represent one DMA remapping hardware unit
167  * present in the platform
168  */
169 static int __init
170 dmar_parse_one_drhd(struct acpi_dmar_header *header)
171 {
172         struct acpi_dmar_hardware_unit *drhd;
173         struct dmar_drhd_unit *dmaru;
174         int ret = 0;
175
176         drhd = (struct acpi_dmar_hardware_unit *)header;
177         if (!drhd->address) {
178                 /* Promote an attitude of violence to a BIOS engineer today */
179                 WARN(1, "Your BIOS is broken; DMAR reported at address zero!\n"
180                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
181                      dmi_get_system_info(DMI_BIOS_VENDOR),
182                      dmi_get_system_info(DMI_BIOS_VERSION),
183                      dmi_get_system_info(DMI_PRODUCT_VERSION));
184                 return -ENODEV;
185         }
186         dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
187         if (!dmaru)
188                 return -ENOMEM;
189
190         dmaru->hdr = header;
191         dmaru->reg_base_addr = drhd->address;
192         dmaru->segment = drhd->segment;
193         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
194
195         ret = alloc_iommu(dmaru);
196         if (ret) {
197                 kfree(dmaru);
198                 return ret;
199         }
200         dmar_register_drhd_unit(dmaru);
201         return 0;
202 }
203
204 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
205 {
206         struct acpi_dmar_hardware_unit *drhd;
207         int ret = 0;
208
209         drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
210
211         if (dmaru->include_all)
212                 return 0;
213
214         ret = dmar_parse_dev_scope((void *)(drhd + 1),
215                                 ((void *)drhd) + drhd->header.length,
216                                 &dmaru->devices_cnt, &dmaru->devices,
217                                 drhd->segment);
218         if (ret) {
219                 list_del(&dmaru->list);
220                 kfree(dmaru);
221         }
222         return ret;
223 }
224
225 #ifdef CONFIG_DMAR
226 LIST_HEAD(dmar_rmrr_units);
227
228 static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
229 {
230         list_add(&rmrr->list, &dmar_rmrr_units);
231 }
232
233
234 static int __init
235 dmar_parse_one_rmrr(struct acpi_dmar_header *header)
236 {
237         struct acpi_dmar_reserved_memory *rmrr;
238         struct dmar_rmrr_unit *rmrru;
239
240         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
241         if (!rmrru)
242                 return -ENOMEM;
243
244         rmrru->hdr = header;
245         rmrr = (struct acpi_dmar_reserved_memory *)header;
246         rmrru->base_address = rmrr->base_address;
247         rmrru->end_address = rmrr->end_address;
248
249         dmar_register_rmrr_unit(rmrru);
250         return 0;
251 }
252
253 static int __init
254 rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
255 {
256         struct acpi_dmar_reserved_memory *rmrr;
257         int ret;
258
259         rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
260         ret = dmar_parse_dev_scope((void *)(rmrr + 1),
261                 ((void *)rmrr) + rmrr->header.length,
262                 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
263
264         if (ret || (rmrru->devices_cnt == 0)) {
265                 list_del(&rmrru->list);
266                 kfree(rmrru);
267         }
268         return ret;
269 }
270
271 static LIST_HEAD(dmar_atsr_units);
272
273 static int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
274 {
275         struct acpi_dmar_atsr *atsr;
276         struct dmar_atsr_unit *atsru;
277
278         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
279         atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
280         if (!atsru)
281                 return -ENOMEM;
282
283         atsru->hdr = hdr;
284         atsru->include_all = atsr->flags & 0x1;
285
286         list_add(&atsru->list, &dmar_atsr_units);
287
288         return 0;
289 }
290
291 static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
292 {
293         int rc;
294         struct acpi_dmar_atsr *atsr;
295
296         if (atsru->include_all)
297                 return 0;
298
299         atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
300         rc = dmar_parse_dev_scope((void *)(atsr + 1),
301                                 (void *)atsr + atsr->header.length,
302                                 &atsru->devices_cnt, &atsru->devices,
303                                 atsr->segment);
304         if (rc || !atsru->devices_cnt) {
305                 list_del(&atsru->list);
306                 kfree(atsru);
307         }
308
309         return rc;
310 }
311
312 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
313 {
314         int i;
315         struct pci_bus *bus;
316         struct acpi_dmar_atsr *atsr;
317         struct dmar_atsr_unit *atsru;
318
319         list_for_each_entry(atsru, &dmar_atsr_units, list) {
320                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
321                 if (atsr->segment == pci_domain_nr(dev->bus))
322                         goto found;
323         }
324
325         return 0;
326
327 found:
328         for (bus = dev->bus; bus; bus = bus->parent) {
329                 struct pci_dev *bridge = bus->self;
330
331                 if (!bridge || !bridge->is_pcie ||
332                     bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
333                         return 0;
334
335                 if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
336                         for (i = 0; i < atsru->devices_cnt; i++)
337                                 if (atsru->devices[i] == bridge)
338                                         return 1;
339                         break;
340                 }
341         }
342
343         if (atsru->include_all)
344                 return 1;
345
346         return 0;
347 }
348 #endif
349
350 static void __init
351 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
352 {
353         struct acpi_dmar_hardware_unit *drhd;
354         struct acpi_dmar_reserved_memory *rmrr;
355         struct acpi_dmar_atsr *atsr;
356
357         switch (header->type) {
358         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
359                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
360                                     header);
361                 printk (KERN_INFO PREFIX
362                         "DRHD base: %#016Lx flags: %#x\n",
363                         (unsigned long long)drhd->address, drhd->flags);
364                 break;
365         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
366                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
367                                     header);
368                 printk (KERN_INFO PREFIX
369                         "RMRR base: %#016Lx end: %#016Lx\n",
370                         (unsigned long long)rmrr->base_address,
371                         (unsigned long long)rmrr->end_address);
372                 break;
373         case ACPI_DMAR_TYPE_ATSR:
374                 atsr = container_of(header, struct acpi_dmar_atsr, header);
375                 printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags);
376                 break;
377         }
378 }
379
380 /**
381  * dmar_table_detect - checks to see if the platform supports DMAR devices
382  */
383 static int __init dmar_table_detect(void)
384 {
385         acpi_status status = AE_OK;
386
387         /* if we could find DMAR table, then there are DMAR devices */
388         status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
389                                 (struct acpi_table_header **)&dmar_tbl,
390                                 &dmar_tbl_size);
391
392         if (ACPI_SUCCESS(status) && !dmar_tbl) {
393                 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
394                 status = AE_NOT_FOUND;
395         }
396
397         return (ACPI_SUCCESS(status) ? 1 : 0);
398 }
399
400 /**
401  * parse_dmar_table - parses the DMA reporting table
402  */
403 static int __init
404 parse_dmar_table(void)
405 {
406         struct acpi_table_dmar *dmar;
407         struct acpi_dmar_header *entry_header;
408         int ret = 0;
409
410         /*
411          * Do it again, earlier dmar_tbl mapping could be mapped with
412          * fixed map.
413          */
414         dmar_table_detect();
415
416         dmar = (struct acpi_table_dmar *)dmar_tbl;
417         if (!dmar)
418                 return -ENODEV;
419
420         if (dmar->width < PAGE_SHIFT - 1) {
421                 printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
422                 return -EINVAL;
423         }
424
425         printk (KERN_INFO PREFIX "Host address width %d\n",
426                 dmar->width + 1);
427
428         entry_header = (struct acpi_dmar_header *)(dmar + 1);
429         while (((unsigned long)entry_header) <
430                         (((unsigned long)dmar) + dmar_tbl->length)) {
431                 /* Avoid looping forever on bad ACPI tables */
432                 if (entry_header->length == 0) {
433                         printk(KERN_WARNING PREFIX
434                                 "Invalid 0-length structure\n");
435                         ret = -EINVAL;
436                         break;
437                 }
438
439                 dmar_table_print_dmar_entry(entry_header);
440
441                 switch (entry_header->type) {
442                 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
443                         ret = dmar_parse_one_drhd(entry_header);
444                         break;
445                 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
446 #ifdef CONFIG_DMAR
447                         ret = dmar_parse_one_rmrr(entry_header);
448 #endif
449                         break;
450                 case ACPI_DMAR_TYPE_ATSR:
451 #ifdef CONFIG_DMAR
452                         ret = dmar_parse_one_atsr(entry_header);
453 #endif
454                         break;
455                 default:
456                         printk(KERN_WARNING PREFIX
457                                 "Unknown DMAR structure type\n");
458                         ret = 0; /* for forward compatibility */
459                         break;
460                 }
461                 if (ret)
462                         break;
463
464                 entry_header = ((void *)entry_header + entry_header->length);
465         }
466         return ret;
467 }
468
469 int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
470                           struct pci_dev *dev)
471 {
472         int index;
473
474         while (dev) {
475                 for (index = 0; index < cnt; index++)
476                         if (dev == devices[index])
477                                 return 1;
478
479                 /* Check our parent */
480                 dev = dev->bus->self;
481         }
482
483         return 0;
484 }
485
486 struct dmar_drhd_unit *
487 dmar_find_matched_drhd_unit(struct pci_dev *dev)
488 {
489         struct dmar_drhd_unit *dmaru = NULL;
490         struct acpi_dmar_hardware_unit *drhd;
491
492         list_for_each_entry(dmaru, &dmar_drhd_units, list) {
493                 drhd = container_of(dmaru->hdr,
494                                     struct acpi_dmar_hardware_unit,
495                                     header);
496
497                 if (dmaru->include_all &&
498                     drhd->segment == pci_domain_nr(dev->bus))
499                         return dmaru;
500
501                 if (dmar_pci_device_match(dmaru->devices,
502                                           dmaru->devices_cnt, dev))
503                         return dmaru;
504         }
505
506         return NULL;
507 }
508
509 int __init dmar_dev_scope_init(void)
510 {
511         struct dmar_drhd_unit *drhd, *drhd_n;
512         int ret = -ENODEV;
513
514         list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
515                 ret = dmar_parse_dev(drhd);
516                 if (ret)
517                         return ret;
518         }
519
520 #ifdef CONFIG_DMAR
521         {
522                 struct dmar_rmrr_unit *rmrr, *rmrr_n;
523                 struct dmar_atsr_unit *atsr, *atsr_n;
524
525                 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
526                         ret = rmrr_parse_dev(rmrr);
527                         if (ret)
528                                 return ret;
529                 }
530
531                 list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
532                         ret = atsr_parse_dev(atsr);
533                         if (ret)
534                                 return ret;
535                 }
536         }
537 #endif
538
539         return ret;
540 }
541
542
543 int __init dmar_table_init(void)
544 {
545         static int dmar_table_initialized;
546         int ret;
547
548         if (dmar_table_initialized)
549                 return 0;
550
551         dmar_table_initialized = 1;
552
553         ret = parse_dmar_table();
554         if (ret) {
555                 if (ret != -ENODEV)
556                         printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
557                 return ret;
558         }
559
560         if (list_empty(&dmar_drhd_units)) {
561                 printk(KERN_INFO PREFIX "No DMAR devices found\n");
562                 return -ENODEV;
563         }
564
565 #ifdef CONFIG_DMAR
566         if (list_empty(&dmar_rmrr_units))
567                 printk(KERN_INFO PREFIX "No RMRR found\n");
568
569         if (list_empty(&dmar_atsr_units))
570                 printk(KERN_INFO PREFIX "No ATSR found\n");
571 #endif
572
573         return 0;
574 }
575
576 void __init detect_intel_iommu(void)
577 {
578         int ret;
579
580         ret = dmar_table_detect();
581
582         {
583 #ifdef CONFIG_INTR_REMAP
584                 struct acpi_table_dmar *dmar;
585                 /*
586                  * for now we will disable dma-remapping when interrupt
587                  * remapping is enabled.
588                  * When support for queued invalidation for IOTLB invalidation
589                  * is added, we will not need this any more.
590                  */
591                 dmar = (struct acpi_table_dmar *) dmar_tbl;
592                 if (ret && cpu_has_x2apic && dmar->flags & 0x1)
593                         printk(KERN_INFO
594                                "Queued invalidation will be enabled to support "
595                                "x2apic and Intr-remapping.\n");
596 #endif
597 #ifdef CONFIG_DMAR
598                 if (ret && !no_iommu && !iommu_detected && !swiotlb &&
599                     !dmar_disabled)
600                         iommu_detected = 1;
601 #endif
602         }
603         early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
604         dmar_tbl = NULL;
605 }
606
607
608 int alloc_iommu(struct dmar_drhd_unit *drhd)
609 {
610         struct intel_iommu *iommu;
611         int map_size;
612         u32 ver;
613         static int iommu_allocated = 0;
614         int agaw = 0;
615         int msagaw = 0;
616
617         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
618         if (!iommu)
619                 return -ENOMEM;
620
621         iommu->seq_id = iommu_allocated++;
622         sprintf (iommu->name, "dmar%d", iommu->seq_id);
623
624         iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
625         if (!iommu->reg) {
626                 printk(KERN_ERR "IOMMU: can't map the region\n");
627                 goto error;
628         }
629         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
630         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
631
632         if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
633                 /* Promote an attitude of violence to a BIOS engineer today */
634                 WARN(1, "Your BIOS is broken; DMAR reported at address %llx returns all ones!\n"
635                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
636                      drhd->reg_base_addr,
637                      dmi_get_system_info(DMI_BIOS_VENDOR),
638                      dmi_get_system_info(DMI_BIOS_VERSION),
639                      dmi_get_system_info(DMI_PRODUCT_VERSION));
640                 goto err_unmap;
641         }
642
643 #ifdef CONFIG_DMAR
644         agaw = iommu_calculate_agaw(iommu);
645         if (agaw < 0) {
646                 printk(KERN_ERR
647                        "Cannot get a valid agaw for iommu (seq_id = %d)\n",
648                        iommu->seq_id);
649                 goto err_unmap;
650         }
651         msagaw = iommu_calculate_max_sagaw(iommu);
652         if (msagaw < 0) {
653                 printk(KERN_ERR
654                         "Cannot get a valid max agaw for iommu (seq_id = %d)\n",
655                         iommu->seq_id);
656                 goto err_unmap;
657         }
658 #endif
659         iommu->agaw = agaw;
660         iommu->msagaw = msagaw;
661
662         /* the registers might be more than one page */
663         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
664                 cap_max_fault_reg_offset(iommu->cap));
665         map_size = VTD_PAGE_ALIGN(map_size);
666         if (map_size > VTD_PAGE_SIZE) {
667                 iounmap(iommu->reg);
668                 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
669                 if (!iommu->reg) {
670                         printk(KERN_ERR "IOMMU: can't map the region\n");
671                         goto error;
672                 }
673         }
674
675         ver = readl(iommu->reg + DMAR_VER_REG);
676         pr_info("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
677                 (unsigned long long)drhd->reg_base_addr,
678                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
679                 (unsigned long long)iommu->cap,
680                 (unsigned long long)iommu->ecap);
681
682         spin_lock_init(&iommu->register_lock);
683
684         drhd->iommu = iommu;
685         return 0;
686
687  err_unmap:
688         iounmap(iommu->reg);
689  error:
690         kfree(iommu);
691         return -1;
692 }
693
694 void free_iommu(struct intel_iommu *iommu)
695 {
696         if (!iommu)
697                 return;
698
699 #ifdef CONFIG_DMAR
700         free_dmar_iommu(iommu);
701 #endif
702
703         if (iommu->reg)
704                 iounmap(iommu->reg);
705         kfree(iommu);
706 }
707
708 /*
709  * Reclaim all the submitted descriptors which have completed its work.
710  */
711 static inline void reclaim_free_desc(struct q_inval *qi)
712 {
713         while (qi->desc_status[qi->free_tail] == QI_DONE ||
714                qi->desc_status[qi->free_tail] == QI_ABORT) {
715                 qi->desc_status[qi->free_tail] = QI_FREE;
716                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
717                 qi->free_cnt++;
718         }
719 }
720
721 static int qi_check_fault(struct intel_iommu *iommu, int index)
722 {
723         u32 fault;
724         int head, tail;
725         struct q_inval *qi = iommu->qi;
726         int wait_index = (index + 1) % QI_LENGTH;
727
728         if (qi->desc_status[wait_index] == QI_ABORT)
729                 return -EAGAIN;
730
731         fault = readl(iommu->reg + DMAR_FSTS_REG);
732
733         /*
734          * If IQE happens, the head points to the descriptor associated
735          * with the error. No new descriptors are fetched until the IQE
736          * is cleared.
737          */
738         if (fault & DMA_FSTS_IQE) {
739                 head = readl(iommu->reg + DMAR_IQH_REG);
740                 if ((head >> DMAR_IQ_SHIFT) == index) {
741                         printk(KERN_ERR "VT-d detected invalid descriptor: "
742                                 "low=%llx, high=%llx\n",
743                                 (unsigned long long)qi->desc[index].low,
744                                 (unsigned long long)qi->desc[index].high);
745                         memcpy(&qi->desc[index], &qi->desc[wait_index],
746                                         sizeof(struct qi_desc));
747                         __iommu_flush_cache(iommu, &qi->desc[index],
748                                         sizeof(struct qi_desc));
749                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
750                         return -EINVAL;
751                 }
752         }
753
754         /*
755          * If ITE happens, all pending wait_desc commands are aborted.
756          * No new descriptors are fetched until the ITE is cleared.
757          */
758         if (fault & DMA_FSTS_ITE) {
759                 head = readl(iommu->reg + DMAR_IQH_REG);
760                 head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
761                 head |= 1;
762                 tail = readl(iommu->reg + DMAR_IQT_REG);
763                 tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
764
765                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
766
767                 do {
768                         if (qi->desc_status[head] == QI_IN_USE)
769                                 qi->desc_status[head] = QI_ABORT;
770                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
771                 } while (head != tail);
772
773                 if (qi->desc_status[wait_index] == QI_ABORT)
774                         return -EAGAIN;
775         }
776
777         if (fault & DMA_FSTS_ICE)
778                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
779
780         return 0;
781 }
782
783 /*
784  * Submit the queued invalidation descriptor to the remapping
785  * hardware unit and wait for its completion.
786  */
787 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
788 {
789         int rc;
790         struct q_inval *qi = iommu->qi;
791         struct qi_desc *hw, wait_desc;
792         int wait_index, index;
793         unsigned long flags;
794
795         if (!qi)
796                 return 0;
797
798         hw = qi->desc;
799
800 restart:
801         rc = 0;
802
803         spin_lock_irqsave(&qi->q_lock, flags);
804         while (qi->free_cnt < 3) {
805                 spin_unlock_irqrestore(&qi->q_lock, flags);
806                 cpu_relax();
807                 spin_lock_irqsave(&qi->q_lock, flags);
808         }
809
810         index = qi->free_head;
811         wait_index = (index + 1) % QI_LENGTH;
812
813         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
814
815         hw[index] = *desc;
816
817         wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
818                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
819         wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
820
821         hw[wait_index] = wait_desc;
822
823         __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
824         __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
825
826         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
827         qi->free_cnt -= 2;
828
829         /*
830          * update the HW tail register indicating the presence of
831          * new descriptors.
832          */
833         writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
834
835         while (qi->desc_status[wait_index] != QI_DONE) {
836                 /*
837                  * We will leave the interrupts disabled, to prevent interrupt
838                  * context to queue another cmd while a cmd is already submitted
839                  * and waiting for completion on this cpu. This is to avoid
840                  * a deadlock where the interrupt context can wait indefinitely
841                  * for free slots in the queue.
842                  */
843                 rc = qi_check_fault(iommu, index);
844                 if (rc)
845                         break;
846
847                 spin_unlock(&qi->q_lock);
848                 cpu_relax();
849                 spin_lock(&qi->q_lock);
850         }
851
852         qi->desc_status[index] = QI_DONE;
853
854         reclaim_free_desc(qi);
855         spin_unlock_irqrestore(&qi->q_lock, flags);
856
857         if (rc == -EAGAIN)
858                 goto restart;
859
860         return rc;
861 }
862
863 /*
864  * Flush the global interrupt entry cache.
865  */
866 void qi_global_iec(struct intel_iommu *iommu)
867 {
868         struct qi_desc desc;
869
870         desc.low = QI_IEC_TYPE;
871         desc.high = 0;
872
873         /* should never fail */
874         qi_submit_sync(&desc, iommu);
875 }
876
877 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
878                       u64 type)
879 {
880         struct qi_desc desc;
881
882         desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
883                         | QI_CC_GRAN(type) | QI_CC_TYPE;
884         desc.high = 0;
885
886         qi_submit_sync(&desc, iommu);
887 }
888
889 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
890                     unsigned int size_order, u64 type)
891 {
892         u8 dw = 0, dr = 0;
893
894         struct qi_desc desc;
895         int ih = 0;
896
897         if (cap_write_drain(iommu->cap))
898                 dw = 1;
899
900         if (cap_read_drain(iommu->cap))
901                 dr = 1;
902
903         desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
904                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
905         desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
906                 | QI_IOTLB_AM(size_order);
907
908         qi_submit_sync(&desc, iommu);
909 }
910
911 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
912                         u64 addr, unsigned mask)
913 {
914         struct qi_desc desc;
915
916         if (mask) {
917                 BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
918                 addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
919                 desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
920         } else
921                 desc.high = QI_DEV_IOTLB_ADDR(addr);
922
923         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
924                 qdep = 0;
925
926         desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
927                    QI_DIOTLB_TYPE;
928
929         qi_submit_sync(&desc, iommu);
930 }
931
932 /*
933  * Disable Queued Invalidation interface.
934  */
935 void dmar_disable_qi(struct intel_iommu *iommu)
936 {
937         unsigned long flags;
938         u32 sts;
939         cycles_t start_time = get_cycles();
940
941         if (!ecap_qis(iommu->ecap))
942                 return;
943
944         spin_lock_irqsave(&iommu->register_lock, flags);
945
946         sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
947         if (!(sts & DMA_GSTS_QIES))
948                 goto end;
949
950         /*
951          * Give a chance to HW to complete the pending invalidation requests.
952          */
953         while ((readl(iommu->reg + DMAR_IQT_REG) !=
954                 readl(iommu->reg + DMAR_IQH_REG)) &&
955                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
956                 cpu_relax();
957
958         iommu->gcmd &= ~DMA_GCMD_QIE;
959         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
960
961         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
962                       !(sts & DMA_GSTS_QIES), sts);
963 end:
964         spin_unlock_irqrestore(&iommu->register_lock, flags);
965 }
966
967 /*
968  * Enable queued invalidation.
969  */
970 static void __dmar_enable_qi(struct intel_iommu *iommu)
971 {
972         u32 sts;
973         unsigned long flags;
974         struct q_inval *qi = iommu->qi;
975
976         qi->free_head = qi->free_tail = 0;
977         qi->free_cnt = QI_LENGTH;
978
979         spin_lock_irqsave(&iommu->register_lock, flags);
980
981         /* write zero to the tail reg */
982         writel(0, iommu->reg + DMAR_IQT_REG);
983
984         dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
985
986         iommu->gcmd |= DMA_GCMD_QIE;
987         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
988
989         /* Make sure hardware complete it */
990         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
991
992         spin_unlock_irqrestore(&iommu->register_lock, flags);
993 }
994
995 /*
996  * Enable Queued Invalidation interface. This is a must to support
997  * interrupt-remapping. Also used by DMA-remapping, which replaces
998  * register based IOTLB invalidation.
999  */
1000 int dmar_enable_qi(struct intel_iommu *iommu)
1001 {
1002         struct q_inval *qi;
1003
1004         if (!ecap_qis(iommu->ecap))
1005                 return -ENOENT;
1006
1007         /*
1008          * queued invalidation is already setup and enabled.
1009          */
1010         if (iommu->qi)
1011                 return 0;
1012
1013         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1014         if (!iommu->qi)
1015                 return -ENOMEM;
1016
1017         qi = iommu->qi;
1018
1019         qi->desc = (void *)(get_zeroed_page(GFP_ATOMIC));
1020         if (!qi->desc) {
1021                 kfree(qi);
1022                 iommu->qi = 0;
1023                 return -ENOMEM;
1024         }
1025
1026         qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1027         if (!qi->desc_status) {
1028                 free_page((unsigned long) qi->desc);
1029                 kfree(qi);
1030                 iommu->qi = 0;
1031                 return -ENOMEM;
1032         }
1033
1034         qi->free_head = qi->free_tail = 0;
1035         qi->free_cnt = QI_LENGTH;
1036
1037         spin_lock_init(&qi->q_lock);
1038
1039         __dmar_enable_qi(iommu);
1040
1041         return 0;
1042 }
1043
1044 /* iommu interrupt handling. Most stuff are MSI-like. */
1045
1046 enum faulttype {
1047         DMA_REMAP,
1048         INTR_REMAP,
1049         UNKNOWN,
1050 };
1051
1052 static const char *dma_remap_fault_reasons[] =
1053 {
1054         "Software",
1055         "Present bit in root entry is clear",
1056         "Present bit in context entry is clear",
1057         "Invalid context entry",
1058         "Access beyond MGAW",
1059         "PTE Write access is not set",
1060         "PTE Read access is not set",
1061         "Next page table ptr is invalid",
1062         "Root table address invalid",
1063         "Context table ptr is invalid",
1064         "non-zero reserved fields in RTP",
1065         "non-zero reserved fields in CTP",
1066         "non-zero reserved fields in PTE",
1067 };
1068
1069 static const char *intr_remap_fault_reasons[] =
1070 {
1071         "Detected reserved fields in the decoded interrupt-remapped request",
1072         "Interrupt index exceeded the interrupt-remapping table size",
1073         "Present field in the IRTE entry is clear",
1074         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1075         "Detected reserved fields in the IRTE entry",
1076         "Blocked a compatibility format interrupt request",
1077         "Blocked an interrupt request due to source-id verification failure",
1078 };
1079
1080 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
1081
1082 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1083 {
1084         if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
1085                                      ARRAY_SIZE(intr_remap_fault_reasons))) {
1086                 *fault_type = INTR_REMAP;
1087                 return intr_remap_fault_reasons[fault_reason - 0x20];
1088         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1089                 *fault_type = DMA_REMAP;
1090                 return dma_remap_fault_reasons[fault_reason];
1091         } else {
1092                 *fault_type = UNKNOWN;
1093                 return "Unknown";
1094         }
1095 }
1096
1097 void dmar_msi_unmask(unsigned int irq)
1098 {
1099         struct intel_iommu *iommu = get_irq_data(irq);
1100         unsigned long flag;
1101
1102         /* unmask it */
1103         spin_lock_irqsave(&iommu->register_lock, flag);
1104         writel(0, iommu->reg + DMAR_FECTL_REG);
1105         /* Read a reg to force flush the post write */
1106         readl(iommu->reg + DMAR_FECTL_REG);
1107         spin_unlock_irqrestore(&iommu->register_lock, flag);
1108 }
1109
1110 void dmar_msi_mask(unsigned int irq)
1111 {
1112         unsigned long flag;
1113         struct intel_iommu *iommu = get_irq_data(irq);
1114
1115         /* mask it */
1116         spin_lock_irqsave(&iommu->register_lock, flag);
1117         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1118         /* Read a reg to force flush the post write */
1119         readl(iommu->reg + DMAR_FECTL_REG);
1120         spin_unlock_irqrestore(&iommu->register_lock, flag);
1121 }
1122
1123 void dmar_msi_write(int irq, struct msi_msg *msg)
1124 {
1125         struct intel_iommu *iommu = get_irq_data(irq);
1126         unsigned long flag;
1127
1128         spin_lock_irqsave(&iommu->register_lock, flag);
1129         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1130         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1131         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1132         spin_unlock_irqrestore(&iommu->register_lock, flag);
1133 }
1134
1135 void dmar_msi_read(int irq, struct msi_msg *msg)
1136 {
1137         struct intel_iommu *iommu = get_irq_data(irq);
1138         unsigned long flag;
1139
1140         spin_lock_irqsave(&iommu->register_lock, flag);
1141         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1142         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1143         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1144         spin_unlock_irqrestore(&iommu->register_lock, flag);
1145 }
1146
1147 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1148                 u8 fault_reason, u16 source_id, unsigned long long addr)
1149 {
1150         const char *reason;
1151         int fault_type;
1152
1153         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1154
1155         if (fault_type == INTR_REMAP)
1156                 printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
1157                        "fault index %llx\n"
1158                         "INTR-REMAP:[fault reason %02d] %s\n",
1159                         (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1160                         PCI_FUNC(source_id & 0xFF), addr >> 48,
1161                         fault_reason, reason);
1162         else
1163                 printk(KERN_ERR
1164                        "DMAR:[%s] Request device [%02x:%02x.%d] "
1165                        "fault addr %llx \n"
1166                        "DMAR:[fault reason %02d] %s\n",
1167                        (type ? "DMA Read" : "DMA Write"),
1168                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1169                        PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1170         return 0;
1171 }
1172
1173 #define PRIMARY_FAULT_REG_LEN (16)
1174 irqreturn_t dmar_fault(int irq, void *dev_id)
1175 {
1176         struct intel_iommu *iommu = dev_id;
1177         int reg, fault_index;
1178         u32 fault_status;
1179         unsigned long flag;
1180
1181         spin_lock_irqsave(&iommu->register_lock, flag);
1182         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1183         if (fault_status)
1184                 printk(KERN_ERR "DRHD: handling fault status reg %x\n",
1185                        fault_status);
1186
1187         /* TBD: ignore advanced fault log currently */
1188         if (!(fault_status & DMA_FSTS_PPF))
1189                 goto clear_rest;
1190
1191         fault_index = dma_fsts_fault_record_index(fault_status);
1192         reg = cap_fault_reg_offset(iommu->cap);
1193         while (1) {
1194                 u8 fault_reason;
1195                 u16 source_id;
1196                 u64 guest_addr;
1197                 int type;
1198                 u32 data;
1199
1200                 /* highest 32 bits */
1201                 data = readl(iommu->reg + reg +
1202                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1203                 if (!(data & DMA_FRCD_F))
1204                         break;
1205
1206                 fault_reason = dma_frcd_fault_reason(data);
1207                 type = dma_frcd_type(data);
1208
1209                 data = readl(iommu->reg + reg +
1210                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1211                 source_id = dma_frcd_source_id(data);
1212
1213                 guest_addr = dmar_readq(iommu->reg + reg +
1214                                 fault_index * PRIMARY_FAULT_REG_LEN);
1215                 guest_addr = dma_frcd_page_addr(guest_addr);
1216                 /* clear the fault */
1217                 writel(DMA_FRCD_F, iommu->reg + reg +
1218                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1219
1220                 spin_unlock_irqrestore(&iommu->register_lock, flag);
1221
1222                 dmar_fault_do_one(iommu, type, fault_reason,
1223                                 source_id, guest_addr);
1224
1225                 fault_index++;
1226                 if (fault_index >= cap_num_fault_regs(iommu->cap))
1227                         fault_index = 0;
1228                 spin_lock_irqsave(&iommu->register_lock, flag);
1229         }
1230 clear_rest:
1231         /* clear all the other faults */
1232         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1233         writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1234
1235         spin_unlock_irqrestore(&iommu->register_lock, flag);
1236         return IRQ_HANDLED;
1237 }
1238
1239 int dmar_set_interrupt(struct intel_iommu *iommu)
1240 {
1241         int irq, ret;
1242
1243         /*
1244          * Check if the fault interrupt is already initialized.
1245          */
1246         if (iommu->irq)
1247                 return 0;
1248
1249         irq = create_irq();
1250         if (!irq) {
1251                 printk(KERN_ERR "IOMMU: no free vectors\n");
1252                 return -EINVAL;
1253         }
1254
1255         set_irq_data(irq, iommu);
1256         iommu->irq = irq;
1257
1258         ret = arch_setup_dmar_msi(irq);
1259         if (ret) {
1260                 set_irq_data(irq, NULL);
1261                 iommu->irq = 0;
1262                 destroy_irq(irq);
1263                 return ret;
1264         }
1265
1266         ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
1267         if (ret)
1268                 printk(KERN_ERR "IOMMU: can't request irq\n");
1269         return ret;
1270 }
1271
1272 int __init enable_drhd_fault_handling(void)
1273 {
1274         struct dmar_drhd_unit *drhd;
1275
1276         /*
1277          * Enable fault control interrupt.
1278          */
1279         for_each_drhd_unit(drhd) {
1280                 int ret;
1281                 struct intel_iommu *iommu = drhd->iommu;
1282                 ret = dmar_set_interrupt(iommu);
1283
1284                 if (ret) {
1285                         printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
1286                                " interrupt, ret %d\n",
1287                                (unsigned long long)drhd->reg_base_addr, ret);
1288                         return -1;
1289                 }
1290         }
1291
1292         return 0;
1293 }
1294
1295 /*
1296  * Re-enable Queued Invalidation interface.
1297  */
1298 int dmar_reenable_qi(struct intel_iommu *iommu)
1299 {
1300         if (!ecap_qis(iommu->ecap))
1301                 return -ENOENT;
1302
1303         if (!iommu->qi)
1304                 return -ENOENT;
1305
1306         /*
1307          * First disable queued invalidation.
1308          */
1309         dmar_disable_qi(iommu);
1310         /*
1311          * Then enable queued invalidation again. Since there is no pending
1312          * invalidation requests now, it's safe to re-enable queued
1313          * invalidation.
1314          */
1315         __dmar_enable_qi(iommu);
1316
1317         return 0;
1318 }
1319
1320 /*
1321  * Check interrupt remapping support in DMAR table description.
1322  */
1323 int dmar_ir_support(void)
1324 {
1325         struct acpi_table_dmar *dmar;
1326         dmar = (struct acpi_table_dmar *)dmar_tbl;
1327         return dmar->flags & 0x1;
1328 }