OSDN Git Service

iommu/vt-d: Fix PCI device refcount leak in dmar_dev_scope_init()
[tomoyo/tomoyo-test1.git] / drivers / iommu / intel / dmar.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2006, Intel Corporation.
4  *
5  * Copyright (C) 2006-2008 Intel Corporation
6  * Author: Ashok Raj <ashok.raj@intel.com>
7  * Author: Shaohua Li <shaohua.li@intel.com>
8  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
9  *
10  * This file implements early detection/parsing of Remapping Devices
11  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
12  * tables.
13  *
14  * These routines are used by both DMA-remapping and Interrupt-remapping
15  */
16
17 #define pr_fmt(fmt)     "DMAR: " fmt
18
19 #include <linux/pci.h>
20 #include <linux/dmar.h>
21 #include <linux/iova.h>
22 #include <linux/timer.h>
23 #include <linux/irq.h>
24 #include <linux/interrupt.h>
25 #include <linux/tboot.h>
26 #include <linux/dmi.h>
27 #include <linux/slab.h>
28 #include <linux/iommu.h>
29 #include <linux/numa.h>
30 #include <linux/limits.h>
31 #include <asm/irq_remapping.h>
32
33 #include "iommu.h"
34 #include "../irq_remapping.h"
35 #include "perf.h"
36 #include "trace.h"
37
38 typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
39 struct dmar_res_callback {
40         dmar_res_handler_t      cb[ACPI_DMAR_TYPE_RESERVED];
41         void                    *arg[ACPI_DMAR_TYPE_RESERVED];
42         bool                    ignore_unhandled;
43         bool                    print_entry;
44 };
45
46 /*
47  * Assumptions:
48  * 1) The hotplug framework guarentees that DMAR unit will be hot-added
49  *    before IO devices managed by that unit.
50  * 2) The hotplug framework guarantees that DMAR unit will be hot-removed
51  *    after IO devices managed by that unit.
52  * 3) Hotplug events are rare.
53  *
54  * Locking rules for DMA and interrupt remapping related global data structures:
55  * 1) Use dmar_global_lock in process context
56  * 2) Use RCU in interrupt context
57  */
58 DECLARE_RWSEM(dmar_global_lock);
59 LIST_HEAD(dmar_drhd_units);
60
61 struct acpi_table_header * __initdata dmar_tbl;
62 static int dmar_dev_scope_status = 1;
63 static DEFINE_IDA(dmar_seq_ids);
64
65 static int alloc_iommu(struct dmar_drhd_unit *drhd);
66 static void free_iommu(struct intel_iommu *iommu);
67
68 static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
69 {
70         /*
71          * add INCLUDE_ALL at the tail, so scan the list will find it at
72          * the very end.
73          */
74         if (drhd->include_all)
75                 list_add_tail_rcu(&drhd->list, &dmar_drhd_units);
76         else
77                 list_add_rcu(&drhd->list, &dmar_drhd_units);
78 }
79
80 void *dmar_alloc_dev_scope(void *start, void *end, int *cnt)
81 {
82         struct acpi_dmar_device_scope *scope;
83
84         *cnt = 0;
85         while (start < end) {
86                 scope = start;
87                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_NAMESPACE ||
88                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
89                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
90                         (*cnt)++;
91                 else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
92                         scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
93                         pr_warn("Unsupported device scope\n");
94                 }
95                 start += scope->length;
96         }
97         if (*cnt == 0)
98                 return NULL;
99
100         return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL);
101 }
102
103 void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt)
104 {
105         int i;
106         struct device *tmp_dev;
107
108         if (*devices && *cnt) {
109                 for_each_active_dev_scope(*devices, *cnt, i, tmp_dev)
110                         put_device(tmp_dev);
111                 kfree(*devices);
112         }
113
114         *devices = NULL;
115         *cnt = 0;
116 }
117
118 /* Optimize out kzalloc()/kfree() for normal cases */
119 static char dmar_pci_notify_info_buf[64];
120
121 static struct dmar_pci_notify_info *
122 dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
123 {
124         int level = 0;
125         size_t size;
126         struct pci_dev *tmp;
127         struct dmar_pci_notify_info *info;
128
129         BUG_ON(dev->is_virtfn);
130
131         /*
132          * Ignore devices that have a domain number higher than what can
133          * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000
134          */
135         if (pci_domain_nr(dev->bus) > U16_MAX)
136                 return NULL;
137
138         /* Only generate path[] for device addition event */
139         if (event == BUS_NOTIFY_ADD_DEVICE)
140                 for (tmp = dev; tmp; tmp = tmp->bus->self)
141                         level++;
142
143         size = struct_size(info, path, level);
144         if (size <= sizeof(dmar_pci_notify_info_buf)) {
145                 info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf;
146         } else {
147                 info = kzalloc(size, GFP_KERNEL);
148                 if (!info) {
149                         if (dmar_dev_scope_status == 0)
150                                 dmar_dev_scope_status = -ENOMEM;
151                         return NULL;
152                 }
153         }
154
155         info->event = event;
156         info->dev = dev;
157         info->seg = pci_domain_nr(dev->bus);
158         info->level = level;
159         if (event == BUS_NOTIFY_ADD_DEVICE) {
160                 for (tmp = dev; tmp; tmp = tmp->bus->self) {
161                         level--;
162                         info->path[level].bus = tmp->bus->number;
163                         info->path[level].device = PCI_SLOT(tmp->devfn);
164                         info->path[level].function = PCI_FUNC(tmp->devfn);
165                         if (pci_is_root_bus(tmp->bus))
166                                 info->bus = tmp->bus->number;
167                 }
168         }
169
170         return info;
171 }
172
173 static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info)
174 {
175         if ((void *)info != dmar_pci_notify_info_buf)
176                 kfree(info);
177 }
178
179 static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus,
180                                 struct acpi_dmar_pci_path *path, int count)
181 {
182         int i;
183
184         if (info->bus != bus)
185                 goto fallback;
186         if (info->level != count)
187                 goto fallback;
188
189         for (i = 0; i < count; i++) {
190                 if (path[i].device != info->path[i].device ||
191                     path[i].function != info->path[i].function)
192                         goto fallback;
193         }
194
195         return true;
196
197 fallback:
198
199         if (count != 1)
200                 return false;
201
202         i = info->level - 1;
203         if (bus              == info->path[i].bus &&
204             path[0].device   == info->path[i].device &&
205             path[0].function == info->path[i].function) {
206                 pr_info(FW_BUG "RMRR entry for device %02x:%02x.%x is broken - applying workaround\n",
207                         bus, path[0].device, path[0].function);
208                 return true;
209         }
210
211         return false;
212 }
213
214 /* Return: > 0 if match found, 0 if no match found, < 0 if error happens */
215 int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
216                           void *start, void*end, u16 segment,
217                           struct dmar_dev_scope *devices,
218                           int devices_cnt)
219 {
220         int i, level;
221         struct device *tmp, *dev = &info->dev->dev;
222         struct acpi_dmar_device_scope *scope;
223         struct acpi_dmar_pci_path *path;
224
225         if (segment != info->seg)
226                 return 0;
227
228         for (; start < end; start += scope->length) {
229                 scope = start;
230                 if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
231                     scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
232                         continue;
233
234                 path = (struct acpi_dmar_pci_path *)(scope + 1);
235                 level = (scope->length - sizeof(*scope)) / sizeof(*path);
236                 if (!dmar_match_pci_path(info, scope->bus, path, level))
237                         continue;
238
239                 /*
240                  * We expect devices with endpoint scope to have normal PCI
241                  * headers, and devices with bridge scope to have bridge PCI
242                  * headers.  However PCI NTB devices may be listed in the
243                  * DMAR table with bridge scope, even though they have a
244                  * normal PCI header.  NTB devices are identified by class
245                  * "BRIDGE_OTHER" (0680h) - we don't declare a socpe mismatch
246                  * for this special case.
247                  */
248                 if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
249                      info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) ||
250                     (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE &&
251                      (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
252                       info->dev->class >> 16 != PCI_BASE_CLASS_BRIDGE))) {
253                         pr_warn("Device scope type does not match for %s\n",
254                                 pci_name(info->dev));
255                         return -EINVAL;
256                 }
257
258                 for_each_dev_scope(devices, devices_cnt, i, tmp)
259                         if (tmp == NULL) {
260                                 devices[i].bus = info->dev->bus->number;
261                                 devices[i].devfn = info->dev->devfn;
262                                 rcu_assign_pointer(devices[i].dev,
263                                                    get_device(dev));
264                                 return 1;
265                         }
266                 BUG_ON(i >= devices_cnt);
267         }
268
269         return 0;
270 }
271
272 int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment,
273                           struct dmar_dev_scope *devices, int count)
274 {
275         int index;
276         struct device *tmp;
277
278         if (info->seg != segment)
279                 return 0;
280
281         for_each_active_dev_scope(devices, count, index, tmp)
282                 if (tmp == &info->dev->dev) {
283                         RCU_INIT_POINTER(devices[index].dev, NULL);
284                         synchronize_rcu();
285                         put_device(tmp);
286                         return 1;
287                 }
288
289         return 0;
290 }
291
292 static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info)
293 {
294         int ret = 0;
295         struct dmar_drhd_unit *dmaru;
296         struct acpi_dmar_hardware_unit *drhd;
297
298         for_each_drhd_unit(dmaru) {
299                 if (dmaru->include_all)
300                         continue;
301
302                 drhd = container_of(dmaru->hdr,
303                                     struct acpi_dmar_hardware_unit, header);
304                 ret = dmar_insert_dev_scope(info, (void *)(drhd + 1),
305                                 ((void *)drhd) + drhd->header.length,
306                                 dmaru->segment,
307                                 dmaru->devices, dmaru->devices_cnt);
308                 if (ret)
309                         break;
310         }
311         if (ret >= 0)
312                 ret = dmar_iommu_notify_scope_dev(info);
313         if (ret < 0 && dmar_dev_scope_status == 0)
314                 dmar_dev_scope_status = ret;
315
316         if (ret >= 0)
317                 intel_irq_remap_add_device(info);
318
319         return ret;
320 }
321
322 static void  dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info)
323 {
324         struct dmar_drhd_unit *dmaru;
325
326         for_each_drhd_unit(dmaru)
327                 if (dmar_remove_dev_scope(info, dmaru->segment,
328                         dmaru->devices, dmaru->devices_cnt))
329                         break;
330         dmar_iommu_notify_scope_dev(info);
331 }
332
333 static inline void vf_inherit_msi_domain(struct pci_dev *pdev)
334 {
335         struct pci_dev *physfn = pci_physfn(pdev);
336
337         dev_set_msi_domain(&pdev->dev, dev_get_msi_domain(&physfn->dev));
338 }
339
340 static int dmar_pci_bus_notifier(struct notifier_block *nb,
341                                  unsigned long action, void *data)
342 {
343         struct pci_dev *pdev = to_pci_dev(data);
344         struct dmar_pci_notify_info *info;
345
346         /* Only care about add/remove events for physical functions.
347          * For VFs we actually do the lookup based on the corresponding
348          * PF in device_to_iommu() anyway. */
349         if (pdev->is_virtfn) {
350                 /*
351                  * Ensure that the VF device inherits the irq domain of the
352                  * PF device. Ideally the device would inherit the domain
353                  * from the bus, but DMAR can have multiple units per bus
354                  * which makes this impossible. The VF 'bus' could inherit
355                  * from the PF device, but that's yet another x86'sism to
356                  * inflict on everybody else.
357                  */
358                 if (action == BUS_NOTIFY_ADD_DEVICE)
359                         vf_inherit_msi_domain(pdev);
360                 return NOTIFY_DONE;
361         }
362
363         if (action != BUS_NOTIFY_ADD_DEVICE &&
364             action != BUS_NOTIFY_REMOVED_DEVICE)
365                 return NOTIFY_DONE;
366
367         info = dmar_alloc_pci_notify_info(pdev, action);
368         if (!info)
369                 return NOTIFY_DONE;
370
371         down_write(&dmar_global_lock);
372         if (action == BUS_NOTIFY_ADD_DEVICE)
373                 dmar_pci_bus_add_dev(info);
374         else if (action == BUS_NOTIFY_REMOVED_DEVICE)
375                 dmar_pci_bus_del_dev(info);
376         up_write(&dmar_global_lock);
377
378         dmar_free_pci_notify_info(info);
379
380         return NOTIFY_OK;
381 }
382
383 static struct notifier_block dmar_pci_bus_nb = {
384         .notifier_call = dmar_pci_bus_notifier,
385         .priority = 1,
386 };
387
388 static struct dmar_drhd_unit *
389 dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
390 {
391         struct dmar_drhd_unit *dmaru;
392
393         list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list,
394                                 dmar_rcu_check())
395                 if (dmaru->segment == drhd->segment &&
396                     dmaru->reg_base_addr == drhd->address)
397                         return dmaru;
398
399         return NULL;
400 }
401
402 /*
403  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
404  * structure which uniquely represent one DMA remapping hardware unit
405  * present in the platform
406  */
407 static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
408 {
409         struct acpi_dmar_hardware_unit *drhd;
410         struct dmar_drhd_unit *dmaru;
411         int ret;
412
413         drhd = (struct acpi_dmar_hardware_unit *)header;
414         dmaru = dmar_find_dmaru(drhd);
415         if (dmaru)
416                 goto out;
417
418         dmaru = kzalloc(sizeof(*dmaru) + header->length, GFP_KERNEL);
419         if (!dmaru)
420                 return -ENOMEM;
421
422         /*
423          * If header is allocated from slab by ACPI _DSM method, we need to
424          * copy the content because the memory buffer will be freed on return.
425          */
426         dmaru->hdr = (void *)(dmaru + 1);
427         memcpy(dmaru->hdr, header, header->length);
428         dmaru->reg_base_addr = drhd->address;
429         dmaru->segment = drhd->segment;
430         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
431         dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
432                                               ((void *)drhd) + drhd->header.length,
433                                               &dmaru->devices_cnt);
434         if (dmaru->devices_cnt && dmaru->devices == NULL) {
435                 kfree(dmaru);
436                 return -ENOMEM;
437         }
438
439         ret = alloc_iommu(dmaru);
440         if (ret) {
441                 dmar_free_dev_scope(&dmaru->devices,
442                                     &dmaru->devices_cnt);
443                 kfree(dmaru);
444                 return ret;
445         }
446         dmar_register_drhd_unit(dmaru);
447
448 out:
449         if (arg)
450                 (*(int *)arg)++;
451
452         return 0;
453 }
454
455 static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)
456 {
457         if (dmaru->devices && dmaru->devices_cnt)
458                 dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt);
459         if (dmaru->iommu)
460                 free_iommu(dmaru->iommu);
461         kfree(dmaru);
462 }
463
464 static int __init dmar_parse_one_andd(struct acpi_dmar_header *header,
465                                       void *arg)
466 {
467         struct acpi_dmar_andd *andd = (void *)header;
468
469         /* Check for NUL termination within the designated length */
470         if (strnlen(andd->device_name, header->length - 8) == header->length - 8) {
471                 pr_warn(FW_BUG
472                            "Your BIOS is broken; ANDD object name is not NUL-terminated\n"
473                            "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
474                            dmi_get_system_info(DMI_BIOS_VENDOR),
475                            dmi_get_system_info(DMI_BIOS_VERSION),
476                            dmi_get_system_info(DMI_PRODUCT_VERSION));
477                 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
478                 return -EINVAL;
479         }
480         pr_info("ANDD device: %x name: %s\n", andd->device_number,
481                 andd->device_name);
482
483         return 0;
484 }
485
486 #ifdef CONFIG_ACPI_NUMA
487 static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
488 {
489         struct acpi_dmar_rhsa *rhsa;
490         struct dmar_drhd_unit *drhd;
491
492         rhsa = (struct acpi_dmar_rhsa *)header;
493         for_each_drhd_unit(drhd) {
494                 if (drhd->reg_base_addr == rhsa->base_address) {
495                         int node = pxm_to_node(rhsa->proximity_domain);
496
497                         if (node != NUMA_NO_NODE && !node_online(node))
498                                 node = NUMA_NO_NODE;
499                         drhd->iommu->node = node;
500                         return 0;
501                 }
502         }
503         pr_warn(FW_BUG
504                 "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
505                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
506                 rhsa->base_address,
507                 dmi_get_system_info(DMI_BIOS_VENDOR),
508                 dmi_get_system_info(DMI_BIOS_VERSION),
509                 dmi_get_system_info(DMI_PRODUCT_VERSION));
510         add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
511
512         return 0;
513 }
514 #else
515 #define dmar_parse_one_rhsa             dmar_res_noop
516 #endif
517
518 static void
519 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
520 {
521         struct acpi_dmar_hardware_unit *drhd;
522         struct acpi_dmar_reserved_memory *rmrr;
523         struct acpi_dmar_atsr *atsr;
524         struct acpi_dmar_rhsa *rhsa;
525         struct acpi_dmar_satc *satc;
526
527         switch (header->type) {
528         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
529                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
530                                     header);
531                 pr_info("DRHD base: %#016Lx flags: %#x\n",
532                         (unsigned long long)drhd->address, drhd->flags);
533                 break;
534         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
535                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
536                                     header);
537                 pr_info("RMRR base: %#016Lx end: %#016Lx\n",
538                         (unsigned long long)rmrr->base_address,
539                         (unsigned long long)rmrr->end_address);
540                 break;
541         case ACPI_DMAR_TYPE_ROOT_ATS:
542                 atsr = container_of(header, struct acpi_dmar_atsr, header);
543                 pr_info("ATSR flags: %#x\n", atsr->flags);
544                 break;
545         case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
546                 rhsa = container_of(header, struct acpi_dmar_rhsa, header);
547                 pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
548                        (unsigned long long)rhsa->base_address,
549                        rhsa->proximity_domain);
550                 break;
551         case ACPI_DMAR_TYPE_NAMESPACE:
552                 /* We don't print this here because we need to sanity-check
553                    it first. So print it in dmar_parse_one_andd() instead. */
554                 break;
555         case ACPI_DMAR_TYPE_SATC:
556                 satc = container_of(header, struct acpi_dmar_satc, header);
557                 pr_info("SATC flags: 0x%x\n", satc->flags);
558                 break;
559         }
560 }
561
562 /**
563  * dmar_table_detect - checks to see if the platform supports DMAR devices
564  */
565 static int __init dmar_table_detect(void)
566 {
567         acpi_status status = AE_OK;
568
569         /* if we could find DMAR table, then there are DMAR devices */
570         status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
571
572         if (ACPI_SUCCESS(status) && !dmar_tbl) {
573                 pr_warn("Unable to map DMAR\n");
574                 status = AE_NOT_FOUND;
575         }
576
577         return ACPI_SUCCESS(status) ? 0 : -ENOENT;
578 }
579
580 static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
581                                        size_t len, struct dmar_res_callback *cb)
582 {
583         struct acpi_dmar_header *iter, *next;
584         struct acpi_dmar_header *end = ((void *)start) + len;
585
586         for (iter = start; iter < end; iter = next) {
587                 next = (void *)iter + iter->length;
588                 if (iter->length == 0) {
589                         /* Avoid looping forever on bad ACPI tables */
590                         pr_debug(FW_BUG "Invalid 0-length structure\n");
591                         break;
592                 } else if (next > end) {
593                         /* Avoid passing table end */
594                         pr_warn(FW_BUG "Record passes table end\n");
595                         return -EINVAL;
596                 }
597
598                 if (cb->print_entry)
599                         dmar_table_print_dmar_entry(iter);
600
601                 if (iter->type >= ACPI_DMAR_TYPE_RESERVED) {
602                         /* continue for forward compatibility */
603                         pr_debug("Unknown DMAR structure type %d\n",
604                                  iter->type);
605                 } else if (cb->cb[iter->type]) {
606                         int ret;
607
608                         ret = cb->cb[iter->type](iter, cb->arg[iter->type]);
609                         if (ret)
610                                 return ret;
611                 } else if (!cb->ignore_unhandled) {
612                         pr_warn("No handler for DMAR structure type %d\n",
613                                 iter->type);
614                         return -EINVAL;
615                 }
616         }
617
618         return 0;
619 }
620
621 static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar,
622                                        struct dmar_res_callback *cb)
623 {
624         return dmar_walk_remapping_entries((void *)(dmar + 1),
625                         dmar->header.length - sizeof(*dmar), cb);
626 }
627
628 /**
629  * parse_dmar_table - parses the DMA reporting table
630  */
631 static int __init
632 parse_dmar_table(void)
633 {
634         struct acpi_table_dmar *dmar;
635         int drhd_count = 0;
636         int ret;
637         struct dmar_res_callback cb = {
638                 .print_entry = true,
639                 .ignore_unhandled = true,
640                 .arg[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &drhd_count,
641                 .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_parse_one_drhd,
642                 .cb[ACPI_DMAR_TYPE_RESERVED_MEMORY] = &dmar_parse_one_rmrr,
643                 .cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr,
644                 .cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa,
645                 .cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd,
646                 .cb[ACPI_DMAR_TYPE_SATC] = &dmar_parse_one_satc,
647         };
648
649         /*
650          * Do it again, earlier dmar_tbl mapping could be mapped with
651          * fixed map.
652          */
653         dmar_table_detect();
654
655         /*
656          * ACPI tables may not be DMA protected by tboot, so use DMAR copy
657          * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
658          */
659         dmar_tbl = tboot_get_dmar_table(dmar_tbl);
660
661         dmar = (struct acpi_table_dmar *)dmar_tbl;
662         if (!dmar)
663                 return -ENODEV;
664
665         if (dmar->width < PAGE_SHIFT - 1) {
666                 pr_warn("Invalid DMAR haw\n");
667                 return -EINVAL;
668         }
669
670         pr_info("Host address width %d\n", dmar->width + 1);
671         ret = dmar_walk_dmar_table(dmar, &cb);
672         if (ret == 0 && drhd_count == 0)
673                 pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
674
675         return ret;
676 }
677
678 static int dmar_pci_device_match(struct dmar_dev_scope devices[],
679                                  int cnt, struct pci_dev *dev)
680 {
681         int index;
682         struct device *tmp;
683
684         while (dev) {
685                 for_each_active_dev_scope(devices, cnt, index, tmp)
686                         if (dev_is_pci(tmp) && dev == to_pci_dev(tmp))
687                                 return 1;
688
689                 /* Check our parent */
690                 dev = dev->bus->self;
691         }
692
693         return 0;
694 }
695
696 struct dmar_drhd_unit *
697 dmar_find_matched_drhd_unit(struct pci_dev *dev)
698 {
699         struct dmar_drhd_unit *dmaru;
700         struct acpi_dmar_hardware_unit *drhd;
701
702         dev = pci_physfn(dev);
703
704         rcu_read_lock();
705         for_each_drhd_unit(dmaru) {
706                 drhd = container_of(dmaru->hdr,
707                                     struct acpi_dmar_hardware_unit,
708                                     header);
709
710                 if (dmaru->include_all &&
711                     drhd->segment == pci_domain_nr(dev->bus))
712                         goto out;
713
714                 if (dmar_pci_device_match(dmaru->devices,
715                                           dmaru->devices_cnt, dev))
716                         goto out;
717         }
718         dmaru = NULL;
719 out:
720         rcu_read_unlock();
721
722         return dmaru;
723 }
724
725 static void __init dmar_acpi_insert_dev_scope(u8 device_number,
726                                               struct acpi_device *adev)
727 {
728         struct dmar_drhd_unit *dmaru;
729         struct acpi_dmar_hardware_unit *drhd;
730         struct acpi_dmar_device_scope *scope;
731         struct device *tmp;
732         int i;
733         struct acpi_dmar_pci_path *path;
734
735         for_each_drhd_unit(dmaru) {
736                 drhd = container_of(dmaru->hdr,
737                                     struct acpi_dmar_hardware_unit,
738                                     header);
739
740                 for (scope = (void *)(drhd + 1);
741                      (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length;
742                      scope = ((void *)scope) + scope->length) {
743                         if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_NAMESPACE)
744                                 continue;
745                         if (scope->enumeration_id != device_number)
746                                 continue;
747
748                         path = (void *)(scope + 1);
749                         pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n",
750                                 dev_name(&adev->dev), dmaru->reg_base_addr,
751                                 scope->bus, path->device, path->function);
752                         for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp)
753                                 if (tmp == NULL) {
754                                         dmaru->devices[i].bus = scope->bus;
755                                         dmaru->devices[i].devfn = PCI_DEVFN(path->device,
756                                                                             path->function);
757                                         rcu_assign_pointer(dmaru->devices[i].dev,
758                                                            get_device(&adev->dev));
759                                         return;
760                                 }
761                         BUG_ON(i >= dmaru->devices_cnt);
762                 }
763         }
764         pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n",
765                 device_number, dev_name(&adev->dev));
766 }
767
768 static int __init dmar_acpi_dev_scope_init(void)
769 {
770         struct acpi_dmar_andd *andd;
771
772         if (dmar_tbl == NULL)
773                 return -ENODEV;
774
775         for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar);
776              ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length;
777              andd = ((void *)andd) + andd->header.length) {
778                 if (andd->header.type == ACPI_DMAR_TYPE_NAMESPACE) {
779                         acpi_handle h;
780                         struct acpi_device *adev;
781
782                         if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT,
783                                                           andd->device_name,
784                                                           &h))) {
785                                 pr_err("Failed to find handle for ACPI object %s\n",
786                                        andd->device_name);
787                                 continue;
788                         }
789                         adev = acpi_fetch_acpi_dev(h);
790                         if (!adev) {
791                                 pr_err("Failed to get device for ACPI object %s\n",
792                                        andd->device_name);
793                                 continue;
794                         }
795                         dmar_acpi_insert_dev_scope(andd->device_number, adev);
796                 }
797         }
798         return 0;
799 }
800
801 int __init dmar_dev_scope_init(void)
802 {
803         struct pci_dev *dev = NULL;
804         struct dmar_pci_notify_info *info;
805
806         if (dmar_dev_scope_status != 1)
807                 return dmar_dev_scope_status;
808
809         if (list_empty(&dmar_drhd_units)) {
810                 dmar_dev_scope_status = -ENODEV;
811         } else {
812                 dmar_dev_scope_status = 0;
813
814                 dmar_acpi_dev_scope_init();
815
816                 for_each_pci_dev(dev) {
817                         if (dev->is_virtfn)
818                                 continue;
819
820                         info = dmar_alloc_pci_notify_info(dev,
821                                         BUS_NOTIFY_ADD_DEVICE);
822                         if (!info) {
823                                 pci_dev_put(dev);
824                                 return dmar_dev_scope_status;
825                         } else {
826                                 dmar_pci_bus_add_dev(info);
827                                 dmar_free_pci_notify_info(info);
828                         }
829                 }
830         }
831
832         return dmar_dev_scope_status;
833 }
834
835 void __init dmar_register_bus_notifier(void)
836 {
837         bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
838 }
839
840
841 int __init dmar_table_init(void)
842 {
843         static int dmar_table_initialized;
844         int ret;
845
846         if (dmar_table_initialized == 0) {
847                 ret = parse_dmar_table();
848                 if (ret < 0) {
849                         if (ret != -ENODEV)
850                                 pr_info("Parse DMAR table failure.\n");
851                 } else  if (list_empty(&dmar_drhd_units)) {
852                         pr_info("No DMAR devices found\n");
853                         ret = -ENODEV;
854                 }
855
856                 if (ret < 0)
857                         dmar_table_initialized = ret;
858                 else
859                         dmar_table_initialized = 1;
860         }
861
862         return dmar_table_initialized < 0 ? dmar_table_initialized : 0;
863 }
864
865 static void warn_invalid_dmar(u64 addr, const char *message)
866 {
867         pr_warn_once(FW_BUG
868                 "Your BIOS is broken; DMAR reported at address %llx%s!\n"
869                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
870                 addr, message,
871                 dmi_get_system_info(DMI_BIOS_VENDOR),
872                 dmi_get_system_info(DMI_BIOS_VERSION),
873                 dmi_get_system_info(DMI_PRODUCT_VERSION));
874         add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
875 }
876
877 static int __ref
878 dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
879 {
880         struct acpi_dmar_hardware_unit *drhd;
881         void __iomem *addr;
882         u64 cap, ecap;
883
884         drhd = (void *)entry;
885         if (!drhd->address) {
886                 warn_invalid_dmar(0, "");
887                 return -EINVAL;
888         }
889
890         if (arg)
891                 addr = ioremap(drhd->address, VTD_PAGE_SIZE);
892         else
893                 addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
894         if (!addr) {
895                 pr_warn("Can't validate DRHD address: %llx\n", drhd->address);
896                 return -EINVAL;
897         }
898
899         cap = dmar_readq(addr + DMAR_CAP_REG);
900         ecap = dmar_readq(addr + DMAR_ECAP_REG);
901
902         if (arg)
903                 iounmap(addr);
904         else
905                 early_iounmap(addr, VTD_PAGE_SIZE);
906
907         if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
908                 warn_invalid_dmar(drhd->address, " returns all ones");
909                 return -EINVAL;
910         }
911
912         return 0;
913 }
914
915 void __init detect_intel_iommu(void)
916 {
917         int ret;
918         struct dmar_res_callback validate_drhd_cb = {
919                 .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_validate_one_drhd,
920                 .ignore_unhandled = true,
921         };
922
923         down_write(&dmar_global_lock);
924         ret = dmar_table_detect();
925         if (!ret)
926                 ret = dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl,
927                                            &validate_drhd_cb);
928         if (!ret && !no_iommu && !iommu_detected &&
929             (!dmar_disabled || dmar_platform_optin())) {
930                 iommu_detected = 1;
931                 /* Make sure ACS will be enabled */
932                 pci_request_acs();
933         }
934
935 #ifdef CONFIG_X86
936         if (!ret) {
937                 x86_init.iommu.iommu_init = intel_iommu_init;
938                 x86_platform.iommu_shutdown = intel_iommu_shutdown;
939         }
940
941 #endif
942
943         if (dmar_tbl) {
944                 acpi_put_table(dmar_tbl);
945                 dmar_tbl = NULL;
946         }
947         up_write(&dmar_global_lock);
948 }
949
950 static void unmap_iommu(struct intel_iommu *iommu)
951 {
952         iounmap(iommu->reg);
953         release_mem_region(iommu->reg_phys, iommu->reg_size);
954 }
955
956 /**
957  * map_iommu: map the iommu's registers
958  * @iommu: the iommu to map
959  * @phys_addr: the physical address of the base resgister
960  *
961  * Memory map the iommu's registers.  Start w/ a single page, and
962  * possibly expand if that turns out to be insufficent.
963  */
964 static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
965 {
966         int map_size, err=0;
967
968         iommu->reg_phys = phys_addr;
969         iommu->reg_size = VTD_PAGE_SIZE;
970
971         if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
972                 pr_err("Can't reserve memory\n");
973                 err = -EBUSY;
974                 goto out;
975         }
976
977         iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
978         if (!iommu->reg) {
979                 pr_err("Can't map the region\n");
980                 err = -ENOMEM;
981                 goto release;
982         }
983
984         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
985         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
986
987         if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
988                 err = -EINVAL;
989                 warn_invalid_dmar(phys_addr, " returns all ones");
990                 goto unmap;
991         }
992         if (ecap_vcs(iommu->ecap))
993                 iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG);
994
995         /* the registers might be more than one page */
996         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
997                          cap_max_fault_reg_offset(iommu->cap));
998         map_size = VTD_PAGE_ALIGN(map_size);
999         if (map_size > iommu->reg_size) {
1000                 iounmap(iommu->reg);
1001                 release_mem_region(iommu->reg_phys, iommu->reg_size);
1002                 iommu->reg_size = map_size;
1003                 if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
1004                                         iommu->name)) {
1005                         pr_err("Can't reserve memory\n");
1006                         err = -EBUSY;
1007                         goto out;
1008                 }
1009                 iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
1010                 if (!iommu->reg) {
1011                         pr_err("Can't map the region\n");
1012                         err = -ENOMEM;
1013                         goto release;
1014                 }
1015         }
1016         err = 0;
1017         goto out;
1018
1019 unmap:
1020         iounmap(iommu->reg);
1021 release:
1022         release_mem_region(iommu->reg_phys, iommu->reg_size);
1023 out:
1024         return err;
1025 }
1026
1027 static int alloc_iommu(struct dmar_drhd_unit *drhd)
1028 {
1029         struct intel_iommu *iommu;
1030         u32 ver, sts;
1031         int agaw = -1;
1032         int msagaw = -1;
1033         int err;
1034
1035         if (!drhd->reg_base_addr) {
1036                 warn_invalid_dmar(0, "");
1037                 return -EINVAL;
1038         }
1039
1040         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
1041         if (!iommu)
1042                 return -ENOMEM;
1043
1044         iommu->seq_id = ida_alloc_range(&dmar_seq_ids, 0,
1045                                         DMAR_UNITS_SUPPORTED - 1, GFP_KERNEL);
1046         if (iommu->seq_id < 0) {
1047                 pr_err("Failed to allocate seq_id\n");
1048                 err = iommu->seq_id;
1049                 goto error;
1050         }
1051         sprintf(iommu->name, "dmar%d", iommu->seq_id);
1052
1053         err = map_iommu(iommu, drhd->reg_base_addr);
1054         if (err) {
1055                 pr_err("Failed to map %s\n", iommu->name);
1056                 goto error_free_seq_id;
1057         }
1058
1059         err = -EINVAL;
1060         if (cap_sagaw(iommu->cap) == 0) {
1061                 pr_info("%s: No supported address widths. Not attempting DMA translation.\n",
1062                         iommu->name);
1063                 drhd->ignored = 1;
1064         }
1065
1066         if (!drhd->ignored) {
1067                 agaw = iommu_calculate_agaw(iommu);
1068                 if (agaw < 0) {
1069                         pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
1070                                iommu->seq_id);
1071                         drhd->ignored = 1;
1072                 }
1073         }
1074         if (!drhd->ignored) {
1075                 msagaw = iommu_calculate_max_sagaw(iommu);
1076                 if (msagaw < 0) {
1077                         pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
1078                                iommu->seq_id);
1079                         drhd->ignored = 1;
1080                         agaw = -1;
1081                 }
1082         }
1083         iommu->agaw = agaw;
1084         iommu->msagaw = msagaw;
1085         iommu->segment = drhd->segment;
1086
1087         iommu->node = NUMA_NO_NODE;
1088
1089         ver = readl(iommu->reg + DMAR_VER_REG);
1090         pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
1091                 iommu->name,
1092                 (unsigned long long)drhd->reg_base_addr,
1093                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1094                 (unsigned long long)iommu->cap,
1095                 (unsigned long long)iommu->ecap);
1096
1097         /* Reflect status in gcmd */
1098         sts = readl(iommu->reg + DMAR_GSTS_REG);
1099         if (sts & DMA_GSTS_IRES)
1100                 iommu->gcmd |= DMA_GCMD_IRE;
1101         if (sts & DMA_GSTS_TES)
1102                 iommu->gcmd |= DMA_GCMD_TE;
1103         if (sts & DMA_GSTS_QIES)
1104                 iommu->gcmd |= DMA_GCMD_QIE;
1105
1106         raw_spin_lock_init(&iommu->register_lock);
1107
1108         /*
1109          * This is only for hotplug; at boot time intel_iommu_enabled won't
1110          * be set yet. When intel_iommu_init() runs, it registers the units
1111          * present at boot time, then sets intel_iommu_enabled.
1112          */
1113         if (intel_iommu_enabled && !drhd->ignored) {
1114                 err = iommu_device_sysfs_add(&iommu->iommu, NULL,
1115                                              intel_iommu_groups,
1116                                              "%s", iommu->name);
1117                 if (err)
1118                         goto err_unmap;
1119
1120                 err = iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
1121                 if (err)
1122                         goto err_sysfs;
1123         }
1124
1125         drhd->iommu = iommu;
1126         iommu->drhd = drhd;
1127
1128         return 0;
1129
1130 err_sysfs:
1131         iommu_device_sysfs_remove(&iommu->iommu);
1132 err_unmap:
1133         unmap_iommu(iommu);
1134 error_free_seq_id:
1135         ida_free(&dmar_seq_ids, iommu->seq_id);
1136 error:
1137         kfree(iommu);
1138         return err;
1139 }
1140
1141 static void free_iommu(struct intel_iommu *iommu)
1142 {
1143         if (intel_iommu_enabled && !iommu->drhd->ignored) {
1144                 iommu_device_unregister(&iommu->iommu);
1145                 iommu_device_sysfs_remove(&iommu->iommu);
1146         }
1147
1148         if (iommu->irq) {
1149                 if (iommu->pr_irq) {
1150                         free_irq(iommu->pr_irq, iommu);
1151                         dmar_free_hwirq(iommu->pr_irq);
1152                         iommu->pr_irq = 0;
1153                 }
1154                 free_irq(iommu->irq, iommu);
1155                 dmar_free_hwirq(iommu->irq);
1156                 iommu->irq = 0;
1157         }
1158
1159         if (iommu->qi) {
1160                 free_page((unsigned long)iommu->qi->desc);
1161                 kfree(iommu->qi->desc_status);
1162                 kfree(iommu->qi);
1163         }
1164
1165         if (iommu->reg)
1166                 unmap_iommu(iommu);
1167
1168         ida_free(&dmar_seq_ids, iommu->seq_id);
1169         kfree(iommu);
1170 }
1171
1172 /*
1173  * Reclaim all the submitted descriptors which have completed its work.
1174  */
1175 static inline void reclaim_free_desc(struct q_inval *qi)
1176 {
1177         while (qi->desc_status[qi->free_tail] == QI_DONE ||
1178                qi->desc_status[qi->free_tail] == QI_ABORT) {
1179                 qi->desc_status[qi->free_tail] = QI_FREE;
1180                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
1181                 qi->free_cnt++;
1182         }
1183 }
1184
1185 static const char *qi_type_string(u8 type)
1186 {
1187         switch (type) {
1188         case QI_CC_TYPE:
1189                 return "Context-cache Invalidation";
1190         case QI_IOTLB_TYPE:
1191                 return "IOTLB Invalidation";
1192         case QI_DIOTLB_TYPE:
1193                 return "Device-TLB Invalidation";
1194         case QI_IEC_TYPE:
1195                 return "Interrupt Entry Cache Invalidation";
1196         case QI_IWD_TYPE:
1197                 return "Invalidation Wait";
1198         case QI_EIOTLB_TYPE:
1199                 return "PASID-based IOTLB Invalidation";
1200         case QI_PC_TYPE:
1201                 return "PASID-cache Invalidation";
1202         case QI_DEIOTLB_TYPE:
1203                 return "PASID-based Device-TLB Invalidation";
1204         case QI_PGRP_RESP_TYPE:
1205                 return "Page Group Response";
1206         default:
1207                 return "UNKNOWN";
1208         }
1209 }
1210
1211 static void qi_dump_fault(struct intel_iommu *iommu, u32 fault)
1212 {
1213         unsigned int head = dmar_readl(iommu->reg + DMAR_IQH_REG);
1214         u64 iqe_err = dmar_readq(iommu->reg + DMAR_IQER_REG);
1215         struct qi_desc *desc = iommu->qi->desc + head;
1216
1217         if (fault & DMA_FSTS_IQE)
1218                 pr_err("VT-d detected Invalidation Queue Error: Reason %llx",
1219                        DMAR_IQER_REG_IQEI(iqe_err));
1220         if (fault & DMA_FSTS_ITE)
1221                 pr_err("VT-d detected Invalidation Time-out Error: SID %llx",
1222                        DMAR_IQER_REG_ITESID(iqe_err));
1223         if (fault & DMA_FSTS_ICE)
1224                 pr_err("VT-d detected Invalidation Completion Error: SID %llx",
1225                        DMAR_IQER_REG_ICESID(iqe_err));
1226
1227         pr_err("QI HEAD: %s qw0 = 0x%llx, qw1 = 0x%llx\n",
1228                qi_type_string(desc->qw0 & 0xf),
1229                (unsigned long long)desc->qw0,
1230                (unsigned long long)desc->qw1);
1231
1232         head = ((head >> qi_shift(iommu)) + QI_LENGTH - 1) % QI_LENGTH;
1233         head <<= qi_shift(iommu);
1234         desc = iommu->qi->desc + head;
1235
1236         pr_err("QI PRIOR: %s qw0 = 0x%llx, qw1 = 0x%llx\n",
1237                qi_type_string(desc->qw0 & 0xf),
1238                (unsigned long long)desc->qw0,
1239                (unsigned long long)desc->qw1);
1240 }
1241
1242 static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
1243 {
1244         u32 fault;
1245         int head, tail;
1246         struct q_inval *qi = iommu->qi;
1247         int shift = qi_shift(iommu);
1248
1249         if (qi->desc_status[wait_index] == QI_ABORT)
1250                 return -EAGAIN;
1251
1252         fault = readl(iommu->reg + DMAR_FSTS_REG);
1253         if (fault & (DMA_FSTS_IQE | DMA_FSTS_ITE | DMA_FSTS_ICE))
1254                 qi_dump_fault(iommu, fault);
1255
1256         /*
1257          * If IQE happens, the head points to the descriptor associated
1258          * with the error. No new descriptors are fetched until the IQE
1259          * is cleared.
1260          */
1261         if (fault & DMA_FSTS_IQE) {
1262                 head = readl(iommu->reg + DMAR_IQH_REG);
1263                 if ((head >> shift) == index) {
1264                         struct qi_desc *desc = qi->desc + head;
1265
1266                         /*
1267                          * desc->qw2 and desc->qw3 are either reserved or
1268                          * used by software as private data. We won't print
1269                          * out these two qw's for security consideration.
1270                          */
1271                         memcpy(desc, qi->desc + (wait_index << shift),
1272                                1 << shift);
1273                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
1274                         pr_info("Invalidation Queue Error (IQE) cleared\n");
1275                         return -EINVAL;
1276                 }
1277         }
1278
1279         /*
1280          * If ITE happens, all pending wait_desc commands are aborted.
1281          * No new descriptors are fetched until the ITE is cleared.
1282          */
1283         if (fault & DMA_FSTS_ITE) {
1284                 head = readl(iommu->reg + DMAR_IQH_REG);
1285                 head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1286                 head |= 1;
1287                 tail = readl(iommu->reg + DMAR_IQT_REG);
1288                 tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1289
1290                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
1291                 pr_info("Invalidation Time-out Error (ITE) cleared\n");
1292
1293                 do {
1294                         if (qi->desc_status[head] == QI_IN_USE)
1295                                 qi->desc_status[head] = QI_ABORT;
1296                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
1297                 } while (head != tail);
1298
1299                 if (qi->desc_status[wait_index] == QI_ABORT)
1300                         return -EAGAIN;
1301         }
1302
1303         if (fault & DMA_FSTS_ICE) {
1304                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
1305                 pr_info("Invalidation Completion Error (ICE) cleared\n");
1306         }
1307
1308         return 0;
1309 }
1310
1311 /*
1312  * Function to submit invalidation descriptors of all types to the queued
1313  * invalidation interface(QI). Multiple descriptors can be submitted at a
1314  * time, a wait descriptor will be appended to each submission to ensure
1315  * hardware has completed the invalidation before return. Wait descriptors
1316  * can be part of the submission but it will not be polled for completion.
1317  */
1318 int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
1319                    unsigned int count, unsigned long options)
1320 {
1321         struct q_inval *qi = iommu->qi;
1322         s64 devtlb_start_ktime = 0;
1323         s64 iotlb_start_ktime = 0;
1324         s64 iec_start_ktime = 0;
1325         struct qi_desc wait_desc;
1326         int wait_index, index;
1327         unsigned long flags;
1328         int offset, shift;
1329         int rc, i;
1330         u64 type;
1331
1332         if (!qi)
1333                 return 0;
1334
1335         type = desc->qw0 & GENMASK_ULL(3, 0);
1336
1337         if ((type == QI_IOTLB_TYPE || type == QI_EIOTLB_TYPE) &&
1338             dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IOTLB))
1339                 iotlb_start_ktime = ktime_to_ns(ktime_get());
1340
1341         if ((type == QI_DIOTLB_TYPE || type == QI_DEIOTLB_TYPE) &&
1342             dmar_latency_enabled(iommu, DMAR_LATENCY_INV_DEVTLB))
1343                 devtlb_start_ktime = ktime_to_ns(ktime_get());
1344
1345         if (type == QI_IEC_TYPE &&
1346             dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IEC))
1347                 iec_start_ktime = ktime_to_ns(ktime_get());
1348
1349 restart:
1350         rc = 0;
1351
1352         raw_spin_lock_irqsave(&qi->q_lock, flags);
1353         /*
1354          * Check if we have enough empty slots in the queue to submit,
1355          * the calculation is based on:
1356          * # of desc + 1 wait desc + 1 space between head and tail
1357          */
1358         while (qi->free_cnt < count + 2) {
1359                 raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1360                 cpu_relax();
1361                 raw_spin_lock_irqsave(&qi->q_lock, flags);
1362         }
1363
1364         index = qi->free_head;
1365         wait_index = (index + count) % QI_LENGTH;
1366         shift = qi_shift(iommu);
1367
1368         for (i = 0; i < count; i++) {
1369                 offset = ((index + i) % QI_LENGTH) << shift;
1370                 memcpy(qi->desc + offset, &desc[i], 1 << shift);
1371                 qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
1372                 trace_qi_submit(iommu, desc[i].qw0, desc[i].qw1,
1373                                 desc[i].qw2, desc[i].qw3);
1374         }
1375         qi->desc_status[wait_index] = QI_IN_USE;
1376
1377         wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
1378                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
1379         if (options & QI_OPT_WAIT_DRAIN)
1380                 wait_desc.qw0 |= QI_IWD_PRQ_DRAIN;
1381         wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
1382         wait_desc.qw2 = 0;
1383         wait_desc.qw3 = 0;
1384
1385         offset = wait_index << shift;
1386         memcpy(qi->desc + offset, &wait_desc, 1 << shift);
1387
1388         qi->free_head = (qi->free_head + count + 1) % QI_LENGTH;
1389         qi->free_cnt -= count + 1;
1390
1391         /*
1392          * update the HW tail register indicating the presence of
1393          * new descriptors.
1394          */
1395         writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG);
1396
1397         while (qi->desc_status[wait_index] != QI_DONE) {
1398                 /*
1399                  * We will leave the interrupts disabled, to prevent interrupt
1400                  * context to queue another cmd while a cmd is already submitted
1401                  * and waiting for completion on this cpu. This is to avoid
1402                  * a deadlock where the interrupt context can wait indefinitely
1403                  * for free slots in the queue.
1404                  */
1405                 rc = qi_check_fault(iommu, index, wait_index);
1406                 if (rc)
1407                         break;
1408
1409                 raw_spin_unlock(&qi->q_lock);
1410                 cpu_relax();
1411                 raw_spin_lock(&qi->q_lock);
1412         }
1413
1414         for (i = 0; i < count; i++)
1415                 qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE;
1416
1417         reclaim_free_desc(qi);
1418         raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1419
1420         if (rc == -EAGAIN)
1421                 goto restart;
1422
1423         if (iotlb_start_ktime)
1424                 dmar_latency_update(iommu, DMAR_LATENCY_INV_IOTLB,
1425                                 ktime_to_ns(ktime_get()) - iotlb_start_ktime);
1426
1427         if (devtlb_start_ktime)
1428                 dmar_latency_update(iommu, DMAR_LATENCY_INV_DEVTLB,
1429                                 ktime_to_ns(ktime_get()) - devtlb_start_ktime);
1430
1431         if (iec_start_ktime)
1432                 dmar_latency_update(iommu, DMAR_LATENCY_INV_IEC,
1433                                 ktime_to_ns(ktime_get()) - iec_start_ktime);
1434
1435         return rc;
1436 }
1437
1438 /*
1439  * Flush the global interrupt entry cache.
1440  */
1441 void qi_global_iec(struct intel_iommu *iommu)
1442 {
1443         struct qi_desc desc;
1444
1445         desc.qw0 = QI_IEC_TYPE;
1446         desc.qw1 = 0;
1447         desc.qw2 = 0;
1448         desc.qw3 = 0;
1449
1450         /* should never fail */
1451         qi_submit_sync(iommu, &desc, 1, 0);
1452 }
1453
1454 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
1455                       u64 type)
1456 {
1457         struct qi_desc desc;
1458
1459         desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
1460                         | QI_CC_GRAN(type) | QI_CC_TYPE;
1461         desc.qw1 = 0;
1462         desc.qw2 = 0;
1463         desc.qw3 = 0;
1464
1465         qi_submit_sync(iommu, &desc, 1, 0);
1466 }
1467
1468 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
1469                     unsigned int size_order, u64 type)
1470 {
1471         u8 dw = 0, dr = 0;
1472
1473         struct qi_desc desc;
1474         int ih = 0;
1475
1476         if (cap_write_drain(iommu->cap))
1477                 dw = 1;
1478
1479         if (cap_read_drain(iommu->cap))
1480                 dr = 1;
1481
1482         desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
1483                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
1484         desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
1485                 | QI_IOTLB_AM(size_order);
1486         desc.qw2 = 0;
1487         desc.qw3 = 0;
1488
1489         qi_submit_sync(iommu, &desc, 1, 0);
1490 }
1491
1492 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1493                         u16 qdep, u64 addr, unsigned mask)
1494 {
1495         struct qi_desc desc;
1496
1497         if (mask) {
1498                 addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
1499                 desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
1500         } else
1501                 desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
1502
1503         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
1504                 qdep = 0;
1505
1506         desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
1507                    QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
1508         desc.qw2 = 0;
1509         desc.qw3 = 0;
1510
1511         qi_submit_sync(iommu, &desc, 1, 0);
1512 }
1513
1514 /* PASID-based IOTLB invalidation */
1515 void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
1516                      unsigned long npages, bool ih)
1517 {
1518         struct qi_desc desc = {.qw2 = 0, .qw3 = 0};
1519
1520         /*
1521          * npages == -1 means a PASID-selective invalidation, otherwise,
1522          * a positive value for Page-selective-within-PASID invalidation.
1523          * 0 is not a valid input.
1524          */
1525         if (WARN_ON(!npages)) {
1526                 pr_err("Invalid input npages = %ld\n", npages);
1527                 return;
1528         }
1529
1530         if (npages == -1) {
1531                 desc.qw0 = QI_EIOTLB_PASID(pasid) |
1532                                 QI_EIOTLB_DID(did) |
1533                                 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
1534                                 QI_EIOTLB_TYPE;
1535                 desc.qw1 = 0;
1536         } else {
1537                 int mask = ilog2(__roundup_pow_of_two(npages));
1538                 unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
1539
1540                 if (WARN_ON_ONCE(!IS_ALIGNED(addr, align)))
1541                         addr = ALIGN_DOWN(addr, align);
1542
1543                 desc.qw0 = QI_EIOTLB_PASID(pasid) |
1544                                 QI_EIOTLB_DID(did) |
1545                                 QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
1546                                 QI_EIOTLB_TYPE;
1547                 desc.qw1 = QI_EIOTLB_ADDR(addr) |
1548                                 QI_EIOTLB_IH(ih) |
1549                                 QI_EIOTLB_AM(mask);
1550         }
1551
1552         qi_submit_sync(iommu, &desc, 1, 0);
1553 }
1554
1555 /* PASID-based device IOTLB Invalidate */
1556 void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1557                               u32 pasid,  u16 qdep, u64 addr, unsigned int size_order)
1558 {
1559         unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
1560         struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1561
1562         desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
1563                 QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
1564                 QI_DEV_IOTLB_PFSID(pfsid);
1565
1566         /*
1567          * If S bit is 0, we only flush a single page. If S bit is set,
1568          * The least significant zero bit indicates the invalidation address
1569          * range. VT-d spec 6.5.2.6.
1570          * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
1571          * size order = 0 is PAGE_SIZE 4KB
1572          * Max Invs Pending (MIP) is set to 0 for now until we have DIT in
1573          * ECAP.
1574          */
1575         if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order))
1576                 pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n",
1577                                     addr, size_order);
1578
1579         /* Take page address */
1580         desc.qw1 = QI_DEV_EIOTLB_ADDR(addr);
1581
1582         if (size_order) {
1583                 /*
1584                  * Existing 0s in address below size_order may be the least
1585                  * significant bit, we must set them to 1s to avoid having
1586                  * smaller size than desired.
1587                  */
1588                 desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1,
1589                                         VTD_PAGE_SHIFT);
1590                 /* Clear size_order bit to indicate size */
1591                 desc.qw1 &= ~mask;
1592                 /* Set the S bit to indicate flushing more than 1 page */
1593                 desc.qw1 |= QI_DEV_EIOTLB_SIZE;
1594         }
1595
1596         qi_submit_sync(iommu, &desc, 1, 0);
1597 }
1598
1599 void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did,
1600                           u64 granu, u32 pasid)
1601 {
1602         struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1603
1604         desc.qw0 = QI_PC_PASID(pasid) | QI_PC_DID(did) |
1605                         QI_PC_GRAN(granu) | QI_PC_TYPE;
1606         qi_submit_sync(iommu, &desc, 1, 0);
1607 }
1608
1609 /*
1610  * Disable Queued Invalidation interface.
1611  */
1612 void dmar_disable_qi(struct intel_iommu *iommu)
1613 {
1614         unsigned long flags;
1615         u32 sts;
1616         cycles_t start_time = get_cycles();
1617
1618         if (!ecap_qis(iommu->ecap))
1619                 return;
1620
1621         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1622
1623         sts =  readl(iommu->reg + DMAR_GSTS_REG);
1624         if (!(sts & DMA_GSTS_QIES))
1625                 goto end;
1626
1627         /*
1628          * Give a chance to HW to complete the pending invalidation requests.
1629          */
1630         while ((readl(iommu->reg + DMAR_IQT_REG) !=
1631                 readl(iommu->reg + DMAR_IQH_REG)) &&
1632                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
1633                 cpu_relax();
1634
1635         iommu->gcmd &= ~DMA_GCMD_QIE;
1636         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1637
1638         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
1639                       !(sts & DMA_GSTS_QIES), sts);
1640 end:
1641         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1642 }
1643
1644 /*
1645  * Enable queued invalidation.
1646  */
1647 static void __dmar_enable_qi(struct intel_iommu *iommu)
1648 {
1649         u32 sts;
1650         unsigned long flags;
1651         struct q_inval *qi = iommu->qi;
1652         u64 val = virt_to_phys(qi->desc);
1653
1654         qi->free_head = qi->free_tail = 0;
1655         qi->free_cnt = QI_LENGTH;
1656
1657         /*
1658          * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability
1659          * is present.
1660          */
1661         if (ecap_smts(iommu->ecap))
1662                 val |= (1 << 11) | 1;
1663
1664         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1665
1666         /* write zero to the tail reg */
1667         writel(0, iommu->reg + DMAR_IQT_REG);
1668
1669         dmar_writeq(iommu->reg + DMAR_IQA_REG, val);
1670
1671         iommu->gcmd |= DMA_GCMD_QIE;
1672         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1673
1674         /* Make sure hardware complete it */
1675         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1676
1677         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1678 }
1679
1680 /*
1681  * Enable Queued Invalidation interface. This is a must to support
1682  * interrupt-remapping. Also used by DMA-remapping, which replaces
1683  * register based IOTLB invalidation.
1684  */
1685 int dmar_enable_qi(struct intel_iommu *iommu)
1686 {
1687         struct q_inval *qi;
1688         struct page *desc_page;
1689
1690         if (!ecap_qis(iommu->ecap))
1691                 return -ENOENT;
1692
1693         /*
1694          * queued invalidation is already setup and enabled.
1695          */
1696         if (iommu->qi)
1697                 return 0;
1698
1699         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1700         if (!iommu->qi)
1701                 return -ENOMEM;
1702
1703         qi = iommu->qi;
1704
1705         /*
1706          * Need two pages to accommodate 256 descriptors of 256 bits each
1707          * if the remapping hardware supports scalable mode translation.
1708          */
1709         desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
1710                                      !!ecap_smts(iommu->ecap));
1711         if (!desc_page) {
1712                 kfree(qi);
1713                 iommu->qi = NULL;
1714                 return -ENOMEM;
1715         }
1716
1717         qi->desc = page_address(desc_page);
1718
1719         qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC);
1720         if (!qi->desc_status) {
1721                 free_page((unsigned long) qi->desc);
1722                 kfree(qi);
1723                 iommu->qi = NULL;
1724                 return -ENOMEM;
1725         }
1726
1727         raw_spin_lock_init(&qi->q_lock);
1728
1729         __dmar_enable_qi(iommu);
1730
1731         return 0;
1732 }
1733
1734 /* iommu interrupt handling. Most stuff are MSI-like. */
1735
1736 enum faulttype {
1737         DMA_REMAP,
1738         INTR_REMAP,
1739         UNKNOWN,
1740 };
1741
1742 static const char *dma_remap_fault_reasons[] =
1743 {
1744         "Software",
1745         "Present bit in root entry is clear",
1746         "Present bit in context entry is clear",
1747         "Invalid context entry",
1748         "Access beyond MGAW",
1749         "PTE Write access is not set",
1750         "PTE Read access is not set",
1751         "Next page table ptr is invalid",
1752         "Root table address invalid",
1753         "Context table ptr is invalid",
1754         "non-zero reserved fields in RTP",
1755         "non-zero reserved fields in CTP",
1756         "non-zero reserved fields in PTE",
1757         "PCE for translation request specifies blocking",
1758 };
1759
1760 static const char * const dma_remap_sm_fault_reasons[] = {
1761         "SM: Invalid Root Table Address",
1762         "SM: TTM 0 for request with PASID",
1763         "SM: TTM 0 for page group request",
1764         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x33-0x37 */
1765         "SM: Error attempting to access Root Entry",
1766         "SM: Present bit in Root Entry is clear",
1767         "SM: Non-zero reserved field set in Root Entry",
1768         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x3B-0x3F */
1769         "SM: Error attempting to access Context Entry",
1770         "SM: Present bit in Context Entry is clear",
1771         "SM: Non-zero reserved field set in the Context Entry",
1772         "SM: Invalid Context Entry",
1773         "SM: DTE field in Context Entry is clear",
1774         "SM: PASID Enable field in Context Entry is clear",
1775         "SM: PASID is larger than the max in Context Entry",
1776         "SM: PRE field in Context-Entry is clear",
1777         "SM: RID_PASID field error in Context-Entry",
1778         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x49-0x4F */
1779         "SM: Error attempting to access the PASID Directory Entry",
1780         "SM: Present bit in Directory Entry is clear",
1781         "SM: Non-zero reserved field set in PASID Directory Entry",
1782         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x53-0x57 */
1783         "SM: Error attempting to access PASID Table Entry",
1784         "SM: Present bit in PASID Table Entry is clear",
1785         "SM: Non-zero reserved field set in PASID Table Entry",
1786         "SM: Invalid Scalable-Mode PASID Table Entry",
1787         "SM: ERE field is clear in PASID Table Entry",
1788         "SM: SRE field is clear in PASID Table Entry",
1789         "Unknown", "Unknown",/* 0x5E-0x5F */
1790         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x60-0x67 */
1791         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x68-0x6F */
1792         "SM: Error attempting to access first-level paging entry",
1793         "SM: Present bit in first-level paging entry is clear",
1794         "SM: Non-zero reserved field set in first-level paging entry",
1795         "SM: Error attempting to access FL-PML4 entry",
1796         "SM: First-level entry address beyond MGAW in Nested translation",
1797         "SM: Read permission error in FL-PML4 entry in Nested translation",
1798         "SM: Read permission error in first-level paging entry in Nested translation",
1799         "SM: Write permission error in first-level paging entry in Nested translation",
1800         "SM: Error attempting to access second-level paging entry",
1801         "SM: Read/Write permission error in second-level paging entry",
1802         "SM: Non-zero reserved field set in second-level paging entry",
1803         "SM: Invalid second-level page table pointer",
1804         "SM: A/D bit update needed in second-level entry when set up in no snoop",
1805         "Unknown", "Unknown", "Unknown", /* 0x7D-0x7F */
1806         "SM: Address in first-level translation is not canonical",
1807         "SM: U/S set 0 for first-level translation with user privilege",
1808         "SM: No execute permission for request with PASID and ER=1",
1809         "SM: Address beyond the DMA hardware max",
1810         "SM: Second-level entry address beyond the max",
1811         "SM: No write permission for Write/AtomicOp request",
1812         "SM: No read permission for Read/AtomicOp request",
1813         "SM: Invalid address-interrupt address",
1814         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x88-0x8F */
1815         "SM: A/D bit update needed in first-level entry when set up in no snoop",
1816 };
1817
1818 static const char *irq_remap_fault_reasons[] =
1819 {
1820         "Detected reserved fields in the decoded interrupt-remapped request",
1821         "Interrupt index exceeded the interrupt-remapping table size",
1822         "Present field in the IRTE entry is clear",
1823         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1824         "Detected reserved fields in the IRTE entry",
1825         "Blocked a compatibility format interrupt request",
1826         "Blocked an interrupt request due to source-id verification failure",
1827 };
1828
1829 static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1830 {
1831         if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1832                                         ARRAY_SIZE(irq_remap_fault_reasons))) {
1833                 *fault_type = INTR_REMAP;
1834                 return irq_remap_fault_reasons[fault_reason - 0x20];
1835         } else if (fault_reason >= 0x30 && (fault_reason - 0x30 <
1836                         ARRAY_SIZE(dma_remap_sm_fault_reasons))) {
1837                 *fault_type = DMA_REMAP;
1838                 return dma_remap_sm_fault_reasons[fault_reason - 0x30];
1839         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1840                 *fault_type = DMA_REMAP;
1841                 return dma_remap_fault_reasons[fault_reason];
1842         } else {
1843                 *fault_type = UNKNOWN;
1844                 return "Unknown";
1845         }
1846 }
1847
1848
1849 static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq)
1850 {
1851         if (iommu->irq == irq)
1852                 return DMAR_FECTL_REG;
1853         else if (iommu->pr_irq == irq)
1854                 return DMAR_PECTL_REG;
1855         else
1856                 BUG();
1857 }
1858
1859 void dmar_msi_unmask(struct irq_data *data)
1860 {
1861         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1862         int reg = dmar_msi_reg(iommu, data->irq);
1863         unsigned long flag;
1864
1865         /* unmask it */
1866         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1867         writel(0, iommu->reg + reg);
1868         /* Read a reg to force flush the post write */
1869         readl(iommu->reg + reg);
1870         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1871 }
1872
1873 void dmar_msi_mask(struct irq_data *data)
1874 {
1875         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1876         int reg = dmar_msi_reg(iommu, data->irq);
1877         unsigned long flag;
1878
1879         /* mask it */
1880         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1881         writel(DMA_FECTL_IM, iommu->reg + reg);
1882         /* Read a reg to force flush the post write */
1883         readl(iommu->reg + reg);
1884         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1885 }
1886
1887 void dmar_msi_write(int irq, struct msi_msg *msg)
1888 {
1889         struct intel_iommu *iommu = irq_get_handler_data(irq);
1890         int reg = dmar_msi_reg(iommu, irq);
1891         unsigned long flag;
1892
1893         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1894         writel(msg->data, iommu->reg + reg + 4);
1895         writel(msg->address_lo, iommu->reg + reg + 8);
1896         writel(msg->address_hi, iommu->reg + reg + 12);
1897         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1898 }
1899
1900 void dmar_msi_read(int irq, struct msi_msg *msg)
1901 {
1902         struct intel_iommu *iommu = irq_get_handler_data(irq);
1903         int reg = dmar_msi_reg(iommu, irq);
1904         unsigned long flag;
1905
1906         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1907         msg->data = readl(iommu->reg + reg + 4);
1908         msg->address_lo = readl(iommu->reg + reg + 8);
1909         msg->address_hi = readl(iommu->reg + reg + 12);
1910         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1911 }
1912
1913 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1914                 u8 fault_reason, u32 pasid, u16 source_id,
1915                 unsigned long long addr)
1916 {
1917         const char *reason;
1918         int fault_type;
1919
1920         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1921
1922         if (fault_type == INTR_REMAP) {
1923                 pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index 0x%llx [fault reason 0x%02x] %s\n",
1924                        source_id >> 8, PCI_SLOT(source_id & 0xFF),
1925                        PCI_FUNC(source_id & 0xFF), addr >> 48,
1926                        fault_reason, reason);
1927
1928                 return 0;
1929         }
1930
1931         if (pasid == INVALID_IOASID)
1932                 pr_err("[%s NO_PASID] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
1933                        type ? "DMA Read" : "DMA Write",
1934                        source_id >> 8, PCI_SLOT(source_id & 0xFF),
1935                        PCI_FUNC(source_id & 0xFF), addr,
1936                        fault_reason, reason);
1937         else
1938                 pr_err("[%s PASID 0x%x] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
1939                        type ? "DMA Read" : "DMA Write", pasid,
1940                        source_id >> 8, PCI_SLOT(source_id & 0xFF),
1941                        PCI_FUNC(source_id & 0xFF), addr,
1942                        fault_reason, reason);
1943
1944         dmar_fault_dump_ptes(iommu, source_id, addr, pasid);
1945
1946         return 0;
1947 }
1948
1949 #define PRIMARY_FAULT_REG_LEN (16)
1950 irqreturn_t dmar_fault(int irq, void *dev_id)
1951 {
1952         struct intel_iommu *iommu = dev_id;
1953         int reg, fault_index;
1954         u32 fault_status;
1955         unsigned long flag;
1956         static DEFINE_RATELIMIT_STATE(rs,
1957                                       DEFAULT_RATELIMIT_INTERVAL,
1958                                       DEFAULT_RATELIMIT_BURST);
1959
1960         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1961         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1962         if (fault_status && __ratelimit(&rs))
1963                 pr_err("DRHD: handling fault status reg %x\n", fault_status);
1964
1965         /* TBD: ignore advanced fault log currently */
1966         if (!(fault_status & DMA_FSTS_PPF))
1967                 goto unlock_exit;
1968
1969         fault_index = dma_fsts_fault_record_index(fault_status);
1970         reg = cap_fault_reg_offset(iommu->cap);
1971         while (1) {
1972                 /* Disable printing, simply clear the fault when ratelimited */
1973                 bool ratelimited = !__ratelimit(&rs);
1974                 u8 fault_reason;
1975                 u16 source_id;
1976                 u64 guest_addr;
1977                 u32 pasid;
1978                 int type;
1979                 u32 data;
1980                 bool pasid_present;
1981
1982                 /* highest 32 bits */
1983                 data = readl(iommu->reg + reg +
1984                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1985                 if (!(data & DMA_FRCD_F))
1986                         break;
1987
1988                 if (!ratelimited) {
1989                         fault_reason = dma_frcd_fault_reason(data);
1990                         type = dma_frcd_type(data);
1991
1992                         pasid = dma_frcd_pasid_value(data);
1993                         data = readl(iommu->reg + reg +
1994                                      fault_index * PRIMARY_FAULT_REG_LEN + 8);
1995                         source_id = dma_frcd_source_id(data);
1996
1997                         pasid_present = dma_frcd_pasid_present(data);
1998                         guest_addr = dmar_readq(iommu->reg + reg +
1999                                         fault_index * PRIMARY_FAULT_REG_LEN);
2000                         guest_addr = dma_frcd_page_addr(guest_addr);
2001                 }
2002
2003                 /* clear the fault */
2004                 writel(DMA_FRCD_F, iommu->reg + reg +
2005                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
2006
2007                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
2008
2009                 if (!ratelimited)
2010                         /* Using pasid -1 if pasid is not present */
2011                         dmar_fault_do_one(iommu, type, fault_reason,
2012                                           pasid_present ? pasid : INVALID_IOASID,
2013                                           source_id, guest_addr);
2014
2015                 fault_index++;
2016                 if (fault_index >= cap_num_fault_regs(iommu->cap))
2017                         fault_index = 0;
2018                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
2019         }
2020
2021         writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO,
2022                iommu->reg + DMAR_FSTS_REG);
2023
2024 unlock_exit:
2025         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
2026         return IRQ_HANDLED;
2027 }
2028
2029 int dmar_set_interrupt(struct intel_iommu *iommu)
2030 {
2031         int irq, ret;
2032
2033         /*
2034          * Check if the fault interrupt is already initialized.
2035          */
2036         if (iommu->irq)
2037                 return 0;
2038
2039         irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
2040         if (irq > 0) {
2041                 iommu->irq = irq;
2042         } else {
2043                 pr_err("No free IRQ vectors\n");
2044                 return -EINVAL;
2045         }
2046
2047         ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
2048         if (ret)
2049                 pr_err("Can't request irq\n");
2050         return ret;
2051 }
2052
2053 int __init enable_drhd_fault_handling(void)
2054 {
2055         struct dmar_drhd_unit *drhd;
2056         struct intel_iommu *iommu;
2057
2058         /*
2059          * Enable fault control interrupt.
2060          */
2061         for_each_iommu(iommu, drhd) {
2062                 u32 fault_status;
2063                 int ret = dmar_set_interrupt(iommu);
2064
2065                 if (ret) {
2066                         pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
2067                                (unsigned long long)drhd->reg_base_addr, ret);
2068                         return -1;
2069                 }
2070
2071                 /*
2072                  * Clear any previous faults.
2073                  */
2074                 dmar_fault(iommu->irq, iommu);
2075                 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
2076                 writel(fault_status, iommu->reg + DMAR_FSTS_REG);
2077         }
2078
2079         return 0;
2080 }
2081
2082 /*
2083  * Re-enable Queued Invalidation interface.
2084  */
2085 int dmar_reenable_qi(struct intel_iommu *iommu)
2086 {
2087         if (!ecap_qis(iommu->ecap))
2088                 return -ENOENT;
2089
2090         if (!iommu->qi)
2091                 return -ENOENT;
2092
2093         /*
2094          * First disable queued invalidation.
2095          */
2096         dmar_disable_qi(iommu);
2097         /*
2098          * Then enable queued invalidation again. Since there is no pending
2099          * invalidation requests now, it's safe to re-enable queued
2100          * invalidation.
2101          */
2102         __dmar_enable_qi(iommu);
2103
2104         return 0;
2105 }
2106
2107 /*
2108  * Check interrupt remapping support in DMAR table description.
2109  */
2110 int __init dmar_ir_support(void)
2111 {
2112         struct acpi_table_dmar *dmar;
2113         dmar = (struct acpi_table_dmar *)dmar_tbl;
2114         if (!dmar)
2115                 return 0;
2116         return dmar->flags & 0x1;
2117 }
2118
2119 /* Check whether DMAR units are in use */
2120 static inline bool dmar_in_use(void)
2121 {
2122         return irq_remapping_enabled || intel_iommu_enabled;
2123 }
2124
2125 static int __init dmar_free_unused_resources(void)
2126 {
2127         struct dmar_drhd_unit *dmaru, *dmaru_n;
2128
2129         if (dmar_in_use())
2130                 return 0;
2131
2132         if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
2133                 bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb);
2134
2135         down_write(&dmar_global_lock);
2136         list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) {
2137                 list_del(&dmaru->list);
2138                 dmar_free_drhd(dmaru);
2139         }
2140         up_write(&dmar_global_lock);
2141
2142         return 0;
2143 }
2144
2145 late_initcall(dmar_free_unused_resources);
2146
2147 /*
2148  * DMAR Hotplug Support
2149  * For more details, please refer to Intel(R) Virtualization Technology
2150  * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
2151  * "Remapping Hardware Unit Hot Plug".
2152  */
2153 static guid_t dmar_hp_guid =
2154         GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B,
2155                   0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF);
2156
2157 /*
2158  * Currently there's only one revision and BIOS will not check the revision id,
2159  * so use 0 for safety.
2160  */
2161 #define DMAR_DSM_REV_ID                 0
2162 #define DMAR_DSM_FUNC_DRHD              1
2163 #define DMAR_DSM_FUNC_ATSR              2
2164 #define DMAR_DSM_FUNC_RHSA              3
2165 #define DMAR_DSM_FUNC_SATC              4
2166
2167 static inline bool dmar_detect_dsm(acpi_handle handle, int func)
2168 {
2169         return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func);
2170 }
2171
2172 static int dmar_walk_dsm_resource(acpi_handle handle, int func,
2173                                   dmar_res_handler_t handler, void *arg)
2174 {
2175         int ret = -ENODEV;
2176         union acpi_object *obj;
2177         struct acpi_dmar_header *start;
2178         struct dmar_res_callback callback;
2179         static int res_type[] = {
2180                 [DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT,
2181                 [DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS,
2182                 [DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY,
2183                 [DMAR_DSM_FUNC_SATC] = ACPI_DMAR_TYPE_SATC,
2184         };
2185
2186         if (!dmar_detect_dsm(handle, func))
2187                 return 0;
2188
2189         obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID,
2190                                       func, NULL, ACPI_TYPE_BUFFER);
2191         if (!obj)
2192                 return -ENODEV;
2193
2194         memset(&callback, 0, sizeof(callback));
2195         callback.cb[res_type[func]] = handler;
2196         callback.arg[res_type[func]] = arg;
2197         start = (struct acpi_dmar_header *)obj->buffer.pointer;
2198         ret = dmar_walk_remapping_entries(start, obj->buffer.length, &callback);
2199
2200         ACPI_FREE(obj);
2201
2202         return ret;
2203 }
2204
2205 static int dmar_hp_add_drhd(struct acpi_dmar_header *header, void *arg)
2206 {
2207         int ret;
2208         struct dmar_drhd_unit *dmaru;
2209
2210         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2211         if (!dmaru)
2212                 return -ENODEV;
2213
2214         ret = dmar_ir_hotplug(dmaru, true);
2215         if (ret == 0)
2216                 ret = dmar_iommu_hotplug(dmaru, true);
2217
2218         return ret;
2219 }
2220
2221 static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
2222 {
2223         int i, ret;
2224         struct device *dev;
2225         struct dmar_drhd_unit *dmaru;
2226
2227         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2228         if (!dmaru)
2229                 return 0;
2230
2231         /*
2232          * All PCI devices managed by this unit should have been destroyed.
2233          */
2234         if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt) {
2235                 for_each_active_dev_scope(dmaru->devices,
2236                                           dmaru->devices_cnt, i, dev)
2237                         return -EBUSY;
2238         }
2239
2240         ret = dmar_ir_hotplug(dmaru, false);
2241         if (ret == 0)
2242                 ret = dmar_iommu_hotplug(dmaru, false);
2243
2244         return ret;
2245 }
2246
2247 static int dmar_hp_release_drhd(struct acpi_dmar_header *header, void *arg)
2248 {
2249         struct dmar_drhd_unit *dmaru;
2250
2251         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2252         if (dmaru) {
2253                 list_del_rcu(&dmaru->list);
2254                 synchronize_rcu();
2255                 dmar_free_drhd(dmaru);
2256         }
2257
2258         return 0;
2259 }
2260
2261 static int dmar_hotplug_insert(acpi_handle handle)
2262 {
2263         int ret;
2264         int drhd_count = 0;
2265
2266         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2267                                      &dmar_validate_one_drhd, (void *)1);
2268         if (ret)
2269                 goto out;
2270
2271         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2272                                      &dmar_parse_one_drhd, (void *)&drhd_count);
2273         if (ret == 0 && drhd_count == 0) {
2274                 pr_warn(FW_BUG "No DRHD structures in buffer returned by _DSM method\n");
2275                 goto out;
2276         } else if (ret) {
2277                 goto release_drhd;
2278         }
2279
2280         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_RHSA,
2281                                      &dmar_parse_one_rhsa, NULL);
2282         if (ret)
2283                 goto release_drhd;
2284
2285         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2286                                      &dmar_parse_one_atsr, NULL);
2287         if (ret)
2288                 goto release_atsr;
2289
2290         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2291                                      &dmar_hp_add_drhd, NULL);
2292         if (!ret)
2293                 return 0;
2294
2295         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2296                                &dmar_hp_remove_drhd, NULL);
2297 release_atsr:
2298         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2299                                &dmar_release_one_atsr, NULL);
2300 release_drhd:
2301         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2302                                &dmar_hp_release_drhd, NULL);
2303 out:
2304         return ret;
2305 }
2306
2307 static int dmar_hotplug_remove(acpi_handle handle)
2308 {
2309         int ret;
2310
2311         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2312                                      &dmar_check_one_atsr, NULL);
2313         if (ret)
2314                 return ret;
2315
2316         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2317                                      &dmar_hp_remove_drhd, NULL);
2318         if (ret == 0) {
2319                 WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2320                                                &dmar_release_one_atsr, NULL));
2321                 WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2322                                                &dmar_hp_release_drhd, NULL));
2323         } else {
2324                 dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2325                                        &dmar_hp_add_drhd, NULL);
2326         }
2327
2328         return ret;
2329 }
2330
2331 static acpi_status dmar_get_dsm_handle(acpi_handle handle, u32 lvl,
2332                                        void *context, void **retval)
2333 {
2334         acpi_handle *phdl = retval;
2335
2336         if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2337                 *phdl = handle;
2338                 return AE_CTRL_TERMINATE;
2339         }
2340
2341         return AE_OK;
2342 }
2343
2344 static int dmar_device_hotplug(acpi_handle handle, bool insert)
2345 {
2346         int ret;
2347         acpi_handle tmp = NULL;
2348         acpi_status status;
2349
2350         if (!dmar_in_use())
2351                 return 0;
2352
2353         if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2354                 tmp = handle;
2355         } else {
2356                 status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle,
2357                                              ACPI_UINT32_MAX,
2358                                              dmar_get_dsm_handle,
2359                                              NULL, NULL, &tmp);
2360                 if (ACPI_FAILURE(status)) {
2361                         pr_warn("Failed to locate _DSM method.\n");
2362                         return -ENXIO;
2363                 }
2364         }
2365         if (tmp == NULL)
2366                 return 0;
2367
2368         down_write(&dmar_global_lock);
2369         if (insert)
2370                 ret = dmar_hotplug_insert(tmp);
2371         else
2372                 ret = dmar_hotplug_remove(tmp);
2373         up_write(&dmar_global_lock);
2374
2375         return ret;
2376 }
2377
2378 int dmar_device_add(acpi_handle handle)
2379 {
2380         return dmar_device_hotplug(handle, true);
2381 }
2382
2383 int dmar_device_remove(acpi_handle handle)
2384 {
2385         return dmar_device_hotplug(handle, false);
2386 }
2387
2388 /*
2389  * dmar_platform_optin - Is %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in DMAR table
2390  *
2391  * Returns true if the platform has %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in
2392  * the ACPI DMAR table. This means that the platform boot firmware has made
2393  * sure no device can issue DMA outside of RMRR regions.
2394  */
2395 bool dmar_platform_optin(void)
2396 {
2397         struct acpi_table_dmar *dmar;
2398         acpi_status status;
2399         bool ret;
2400
2401         status = acpi_get_table(ACPI_SIG_DMAR, 0,
2402                                 (struct acpi_table_header **)&dmar);
2403         if (ACPI_FAILURE(status))
2404                 return false;
2405
2406         ret = !!(dmar->flags & DMAR_PLATFORM_OPT_IN);
2407         acpi_put_table((struct acpi_table_header *)dmar);
2408
2409         return ret;
2410 }
2411 EXPORT_SYMBOL_GPL(dmar_platform_optin);