1/*
2 * Copyright (c) 2014, University of Washington.
3 * All rights reserved.
4 *
5 * This file is distributed under the terms in the attached LICENSE file.
6 * If you do not find this file, copies can be found by writing to:
7 * ETH Zurich D-INFK, CAB F.78, Universitaetstrasse 6, CH-8092 Zurich.
8 * Attn: Systems Group.
9 */
10
11#include <barrelfish/barrelfish.h>
12#include <barrelfish/invocations_arch.h>
13#include <barrelfish/sys_debug.h>
14#include <acpi.h>
15#include <mm/mm.h>
16#include <skb/skb.h>
17#include <pci/confspace/pci_confspace.h>
18
19#include "intel_vtd.h"
20#include "vtd_debug.h"
21
22extern struct mm pci_mm_physaddr;
23static bool vtd_enabled = false;
24
25// cache coherency for frame mappings
26static bool cache_coherence = false;
27static vregion_flags_t vtd_map_attr;
28
29static struct vtd_unit *vtd_units = NULL;
30static struct vtd_domain_list *domains = NULL;
31
32static struct vtd_domain *identity_domain = NULL;
33static lvaddr_t *id_pagetable_vbase = NULL;
34
35#ifdef VTD_DEBUG_
36static void vtd_dump_registers(struct vtd_unit *unit)
37{
38    size_t size = 1 << 13;
39    char *buffer = malloc(size);
40    assert(buffer != NULL);
41    vtd_iotlb_pr(buffer, size, unit->iotlb_regs);
42    vtd_pr(buffer, size, unit->regset);
43    puts(buffer);
44    free(buffer);
45}
46#endif
47
48#ifdef VTD_DEBUG_
49static void dump_domains(struct vtd_domain_list *lst)
50{
51    struct vtd_domain *d;
52    VTD_DEBUG("Printing the list of domains:\n");
53
54    if (domain_list_empty(lst)) {
55        printf("No domains\n");
56	return;
57    }
58    VTD_FOR_EACH(d, lst->head) {
59        if (d == lst->head) printf("H");
60	printf("->[%d]", d->did);
61	if (d == lst->tail) printf("<-T");
62    }
63    printf("\n");
64}
65#endif
66
67// Creates the root-table and context-tables for unit.
68static void vtd_create_tables(struct vtd_unit *unit)
69{
70    assert(unit != NULL);
71    errval_t err;
72    err = frame_alloc(&(unit->rt_frame), BASE_PAGE_SIZE, NULL);
73    assert(err_is_ok(err));
74
75    err = vspace_map_one_frame_attr((void **)&(unit->root_table), BASE_PAGE_BITS,
76				    unit->rt_frame, vtd_map_attr, NULL, NULL);
77    assert(err_is_ok(err));
78
79    for (int i = 0; i < NUM_ROOT_ENTRIES; i++) {
80        err = frame_alloc(&(unit->ct_frame_caps[i]), BASE_PAGE_SIZE, NULL);
81	assert(err_is_ok(err));
82
83	err = vspace_map_one_frame_attr((void **)&(unit->context_tables[i]), BASE_PAGE_BITS,
84					unit->ct_frame_caps[i], vtd_map_attr, NULL, NULL);
85	assert(err_is_ok(err));
86    }
87}
88
89// Maps the remapping hardware memory-mapped registers of a unit to vspace.
90static void *vtd_map_registers(genpaddr_t regset_base)
91{
92    errval_t err;
93    void *regset_vbase;
94    struct capref regset_frame, regset_devframe;
95    struct frame_identity regset_frame_id;
96
97    err = mm_alloc_range(&pci_mm_physaddr, BASE_PAGE_BITS, regset_base,
98			 regset_base + BASE_PAGE_SIZE, &regset_frame, NULL);
99    assert(err_is_ok(err));
100
101    err = frame_identify(regset_frame, &regset_frame_id);
102    assert(err_is_ok(err));
103
104    err = devframe_type(&regset_devframe, regset_frame, BASE_PAGE_BITS);
105    assert(err_is_ok(err));
106    assert(regset_base == regset_frame_id.base);
107
108    err = vspace_map_one_frame_attr(&regset_vbase, BASE_PAGE_SIZE, regset_devframe,
109				    vtd_map_attr, NULL, NULL);
110    assert(err_is_ok(err));
111    return regset_vbase;
112}
113
114static void vtd_set_root_table(struct vtd_unit *unit)
115{
116    assert(unit != NULL);
117    errval_t err;
118    genpaddr_t rt_base;
119    struct frame_identity rt_frame_id;
120    err = frame_identify(unit->rt_frame, &rt_frame_id);
121    assert(err_is_ok(err));
122    rt_base = rt_frame_id.base;
123    assert((rt_base & BASE_PAGE_MASK) == 0);
124    // Set the root-table type
125    vtd_RTADDR_rtt_wrf(unit->regset, ROOT_TABLE_TYPE);
126    // Set the physical address of the root-table
127    vtd_RTADDR_rta_wrf(unit->regset, (rt_base >> BASE_PAGE_BITS));
128    // Update the root-table pointer
129    GSTS_srtp_wait(unit);
130    // Globally invalidate the context-cache and then globally invalidate
131    // the IOTLB (only in this order).
132    vtd_context_cache_glob_inval(unit);
133    vtd_iotlb_glob_inval(unit);
134}
135
136// Creates and initializes a structure for a remapping hardware unit
137// with its register set at regset_vbase.
138//
139// We will have to change this as it is possible for multiple hardware
140// units may reside on the same segment.
141static struct vtd_unit *vtd_create_unit(void *regset_vbase, uint16_t segment)
142{
143    int iro;
144    struct vtd_unit *new_unit;
145    void *iotlb_regs_vbase;
146
147    new_unit = (struct vtd_unit *)malloc(sizeof(struct vtd_unit));
148    assert(new_unit != NULL);
149
150    new_unit->regset = malloc(sizeof(*(new_unit->regset)));
151    assert(new_unit->regset != NULL);
152    vtd_initialize(new_unit->regset, regset_vbase);
153
154    iro = vtd_ECAP_iro_rdf(new_unit->regset);
155    iotlb_regs_vbase = (void *)((uint8_t *)regset_vbase + iro);
156
157    new_unit->iotlb_regs = malloc(sizeof(*(new_unit->iotlb_regs)));
158    assert(new_unit->iotlb_regs != NULL);
159    vtd_iotlb_initialize(new_unit->iotlb_regs, iotlb_regs_vbase);
160
161    new_unit->pci_seg = segment;
162    new_unit->next = NULL;
163
164    // Create root table and context tables for the new remapping unit
165    vtd_create_tables(new_unit);
166    return new_unit;
167}
168
169static void vtd_insert_context_tables(struct vtd_unit *unit)
170{
171    assert(unit != NULL);
172    assert(vtd_CAP_cm_rdf(unit->regset) == 0);
173
174    errval_t err;
175    struct frame_identity ct_id;
176    for (int i = 0; i < NUM_ROOT_ENTRIES; i++) {
177        err = frame_identify(unit->ct_frame_caps[i], &ct_id);
178        assert(err_is_ok(err));
179	assert((ct_id.base & BASE_PAGE_MASK) == 0);
180	vtd_root_entry_ctp_insert(unit->root_table[i], (ct_id.base >> BASE_PAGE_BITS));
181	vtd_root_entry_p_insert(unit->root_table[i], 1);
182    }
183}
184
185// If the cap is a capability for an x86-64 PML4 VNode we return
186// the base physical address of it. If not, we return 0.
187static inline genpaddr_t pml4_base(struct capref cap)
188{
189    errval_t err;
190    struct vnode_identity pml4_id;
191    err = invoke_vnode_identify(cap, &pml4_id);
192    assert(err_is_ok(err));
193    genpaddr_t pt = pml4_id.base;
194    if (pml4_id.type != ObjType_VNode_x86_64_pml4) {
195        return 0;
196    }
197
198    // If the cap is for the identity domain, we return the physical
199    // address of the identity pagetable, which has a PML4 table created
200    // from a frame capability, instead of the base address of the empty
201    // PML4 VNode.
202    pml4_id.base = 0;
203    pml4_id.type = 0;
204    err = invoke_vnode_identify(identity_domain->pml4, &pml4_id);
205    assert(err_is_ok(err));
206    if (pml4_id.base == pt) {
207        pt = identity_domain->pt_gp;
208    }
209    return pt;
210}
211
212// Removes a device from the domain specified by pml4.
213errval_t vtd_domain_remove_device(int seg, int bus, int dev, int func, struct capref pml4)
214{
215    if (!vtd_enabled) return VTD_ERR_NOT_ENABLED;
216    if (vtd_no_units(vtd_units)) return VTD_ERR_NO_UNITS;
217    if (!valid_device(bus, dev, func)) return VTD_ERR_DEV_NOT_FOUND;
218
219    genpaddr_t pt = pml4_base(pml4);
220    if (pt == 0) return VTD_ERR_INVALID_CAP;
221
222    // Find the domain in the list of domains
223    struct vtd_domain *dom = NULL;
224    VTD_FOR_EACH(dom, domains->head) {
225        if (dom->pt_gp == pt) break;
226    }
227
228    if (dom == NULL) return VTD_ERR_DOM_NOT_FOUND;
229
230    errval_t err =  VTD_ERR_DEV_NOT_FOUND;
231
232    // Find the unit containing the device under its scope
233    struct vtd_unit *u = NULL;
234    VTD_FOR_EACH(u, dom->units) {
235        if (u->pci_seg == seg) {
236            vtd_ctxt_entry_array_t *context_table = u->context_tables[bus];
237            uint8_t id = (dev << 3) | func;
238
239            // The device doesn't belong to this domain
240            if (!vtd_ctxt_entry_p_extract(context_table[id])) {
241                return VTD_ERR_DEV_NOT_FOUND;
242            }
243
244            vtd_ctxt_entry_p_insert(context_table[id], 0);
245            vtd_ctxt_entry_t_insert(context_table[id], 0);
246            vtd_ctxt_entry_slptptr_insert(context_table[id], 0);
247            vtd_ctxt_entry_did_insert(context_table[id], 0);
248            vtd_ctxt_entry_aw_insert(context_table[id], 0);
249
250            // After removing the devices, we perform a context-cache device-selective
251            // invalidation followed by an IOTLB domain-selective invalidation.
252            int sid = (bus << 16) | id;
253            vtd_context_cache_dev_inval(dom, sid, vtd_nomask);
254            vtd_iotlb_dom_inval(dom);
255
256            err = SYS_ERR_OK;
257        }
258    }
259
260    return err;
261}
262
263// Adds a device to the domain specified by pml4.
264errval_t vtd_domain_add_device(int seg, int bus, int dev, int func, struct capref pml4)
265{
266    errval_t err;
267    if (!vtd_enabled) return VTD_ERR_NOT_ENABLED;
268    if (vtd_no_units(vtd_units)) return VTD_ERR_NO_UNITS;
269    if (!valid_device(bus, dev, func)) return VTD_ERR_DEV_NOT_FOUND;
270
271    genpaddr_t pt = pml4_base(pml4);
272    if (pt == 0) return VTD_ERR_INVALID_CAP;
273
274    // Find the domain with the provided pml4 capability
275    struct vtd_domain *dom = NULL;
276    VTD_FOR_EACH(dom, domains->head) {
277        if (dom->pt_gp == pt) break;
278    }
279
280    if (dom == NULL) return VTD_ERR_DOM_NOT_FOUND;
281
282    // Find the unit containing the device under its scope
283    err = VTD_ERR_DEV_NOT_FOUND;
284
285    struct vtd_unit *u = NULL;
286    VTD_FOR_EACH(u, dom->units) {
287        if (u->pci_seg == seg) {
288
289            vtd_ctxt_entry_array_t *context_table = u->context_tables[bus];
290            uint8_t id = (dev << 3) | func;
291
292            // When a request is made for a device, if it belongs to the identity domain,
293            // we remove it before adding it to the domain specified by pml4
294            if (vtd_ctxt_entry_p_extract(context_table[id])) {
295                int did = vtd_ctxt_entry_did_extract(context_table[id]);
296                if (did == identity_domain->did && (pt != identity_domain->pt_gp)) {
297                    err = vtd_domain_remove_device(seg, bus, dev, func, identity_domain->pml4);
298                    assert(err_is_ok(err));
299                } else {
300                    return VTD_ERR_DEV_USED;
301                }
302            }
303
304            // If device-TLBs are supported, allow translated and translation requests
305            if (vtd_ECAP_dt_rdf(u->regset)) {
306                vtd_ctxt_entry_t_insert(context_table[id], vtd_hme);
307            }
308            vtd_ctxt_entry_aw_insert(context_table[id], vtd_agaw48);
309            vtd_ctxt_entry_did_insert(context_table[id], dom->did);
310
311            sys_debug_flush_cache();
312
313            vtd_ctxt_entry_slptptr_insert(context_table[id], (pt >> 12));
314            vtd_ctxt_entry_p_insert(context_table[id], 1);
315
316            err = SYS_ERR_OK;
317        }
318    }
319
320    return err;
321}
322
323// Determines the minimum and maximum domain-ids among all
324// hardware units.
325static void vtd_create_did_bounds(struct vtd_unit *head, struct vtd_domain_list *doms)
326{
327    assert(head != NULL);
328    struct vtd_unit *u = head;
329    doms->min_did = vtd_CAP_cm_rdf(u->regset);
330    doms->max_did = vtd_number_domains_supported(u)-1;
331    VTD_FOR_EACH(u, head) {
332        doms->min_did = MAX(doms->min_did, vtd_CAP_cm_rdf(u->regset));
333        doms->max_did = MIN(doms->max_did, vtd_number_domains_supported(u)-1);
334    }
335}
336
337// Creates a new domain for an application using a capability to its root PML4.
338errval_t vtd_create_domain(struct capref pml4)
339{
340    if (!vtd_enabled) return VTD_ERR_NOT_ENABLED;
341    if (vtd_no_units(vtd_units)) return VTD_ERR_NO_UNITS;
342
343    // Check that pml4 is a capability for a x86-64 PML4 VNode
344    errval_t err;
345    struct vnode_identity pml4_id;
346    err = invoke_vnode_identify(pml4, &pml4_id);
347    assert(err_is_ok(err));
348    genpaddr_t pt = pml4_id.base;
349    if (pml4_id.type != ObjType_VNode_x86_64_pml4) return VTD_ERR_INVALID_CAP;
350    int did = domains->min_did;
351
352    // Find a domain-id for the new domain
353    struct vtd_domain *d = NULL;
354    VTD_FOR_EACH(d, domains->head) {
355        if (did < d->did) break;
356	did++;
357    }
358
359    // All domain-ids have been exausted. Return an error.
360    if (did > domains->max_did) return VTD_ERR_FULL;
361
362    VTD_DEBUG("Creating domain with pt = %"PRIu64", did = %d\n", pt, did);
363#ifdef VTD_DEBUG_
364    dump_domains(domains);
365#endif
366
367    struct vtd_domain *new_domain = vtd_new_domain(did, pt, pml4, vtd_units);
368    vtd_insert_domain(new_domain, d, domains);
369
370    return SYS_ERR_OK;
371}
372
373// Removes a domain for an application with the specified root PML4.
374errval_t vtd_remove_domain(struct capref pml4)
375{
376    if (!vtd_enabled) return VTD_ERR_NOT_ENABLED;
377    if (vtd_no_units(vtd_units)) return VTD_ERR_NO_UNITS;
378
379    // Check that pml4 is a capability for a x86-64 PML4 VNode
380    errval_t err;
381    struct vnode_identity pml4_id;
382    err = invoke_vnode_identify(pml4, &pml4_id);
383    assert(err_is_ok(err));
384    genpaddr_t pt = pml4_id.base;
385    if (pml4_id.type != ObjType_VNode_x86_64_pml4) return VTD_ERR_INVALID_CAP;
386
387    // empty
388    if (domain_list_empty(domains)) return VTD_ERR_DOM_NOT_FOUND;
389
390    struct vtd_domain *d = NULL;
391    VTD_FOR_EACH(d, domains->head) {
392        if (d->pt_gp == pt) {
393	    vtd_delete_domain(d, domains);
394	    return SYS_ERR_OK;
395	}
396    }
397    return VTD_ERR_DOM_NOT_FOUND;
398}
399
400// Establishes a mapping from va to pa in pt, a second-level
401// pagetable structure.
402static inline uint64_t vtd_map(uint64_t va, uint64_t pa, lvaddr_t *pt, int levels)
403{
404    struct capref pe_frame;
405    struct frame_identity pe_id;
406    lvaddr_t *vtp = pt;
407
408    int e = 0;
409    for (int current_level = 1; current_level <= levels; current_level++) {
410        switch (current_level) {
411            case 1:
412                e = X86_64_PML4_BASE(va);
413                break;
414            case 2:
415                e = X86_64_PDPT_BASE(va);
416                break;
417            case 3:
418                e = X86_64_PDIR_BASE(va);
419                break;
420            case 4:
421                e = X86_64_PTABLE_BASE(va);
422                break;
423        }
424        if (current_level == levels) break;
425
426        if (vtp[e + PTABLE_SIZE] == 0) {
427            int bytes = 2 * BASE_PAGE_SIZE;
428            bytes = (current_level == (levels-1)) ?  bytes : 2 * bytes;
429
430            errval_t err = frame_alloc(&pe_frame, bytes, NULL);
431            assert(err_is_ok(err));
432
433            void *vbase;
434            err = vspace_map_one_frame_attr(&vbase, bytes, pe_frame,
435                            vtd_map_attr, NULL, NULL);
436            assert(err_is_ok(err));
437            assert(((lvaddr_t)vbase & BASE_PAGE_MASK) == 0);
438
439            err = frame_identify(pe_frame, &pe_id);
440            assert(err_is_ok(err));
441            assert((pe_id.base & BASE_PAGE_MASK) == 0);
442
443            union sl_pdir_entry *entry = (union sl_pdir_entry *)vtp + e;
444            sl_map_table(entry, pe_id.base);
445            vtp[e + PTABLE_SIZE] = (lvaddr_t)vbase;
446        }
447        vtp = (lvaddr_t *)vtp[e + PTABLE_SIZE];
448    }
449
450    union sl_ptable_entry *entry = (union sl_ptable_entry *)vtp + e;
451    paging_sl_flags_t flags = SL_PTABLE_READ | SL_PTABLE_WRITE;
452
453    switch (levels) {
454        case 2:
455            sl_map_large30(entry, pa, flags);
456        break;
457        case 3:
458            sl_map_large21(entry, pa, flags);
459        break;
460        case 4:
461            sl_map(entry, pa, flags);
462        break;
463    }
464
465    return (1UL << vtd_levels_to_page_bits(levels));
466}
467
468// Returns the minimum number of supported page bits among all hardware
469// units.
470static int vtd_page_bits(struct vtd_unit *head)
471{
472    assert(head != NULL);
473    int num_page_bits = 30;
474
475    struct vtd_unit *u = head;
476    VTD_FOR_EACH(u, head) {
477      int unit_page_bits;
478      if (vtd_CAP_sllps30_rdf(u->regset)) {
479          unit_page_bits = 30;
480      } else if (vtd_CAP_sllps21_rdf(u->regset)) {
481	  unit_page_bits = 21;
482      } else {
483          unit_page_bits = 12;
484      }
485      num_page_bits = MIN(num_page_bits, unit_page_bits);
486    }
487    return num_page_bits;
488}
489
490// Create the identity domain along with its identity pagetable if there
491// is at least one remapping unit present.
492static void vtd_create_identity_domain(void)
493{
494    int levels = 4;
495    int page_bits = vtd_page_bits(vtd_units);
496    levels -= (page_bits - BASE_PAGE_BITS) / SL_PTABLE_MASK_BITS;
497
498    // Map only the first 1<<46 GB of physical memory. Attempting to map
499    // the entire address space with this current implementation is
500    // infeasible.
501    uint64_t max_addr = 1ULL << 46;
502    errval_t err;
503    struct frame_identity pe_frame_id;
504    struct capref pe_frame;
505    void *pe_vaddr;
506    err = frame_alloc(&pe_frame, 256 * BASE_PAGE_SIZE, NULL);
507    assert(err_is_ok(err));
508    err = frame_identify(pe_frame, &pe_frame_id);
509    assert(err_is_ok(err));
510    err = vspace_map_one_frame_attr(&pe_vaddr, pe_frame_id.bytes, pe_frame,
511				    vtd_map_attr, NULL, NULL);
512    assert(err_is_ok(err));
513    assert((pe_frame_id.base & BASE_PAGE_MASK) == 0 &&
514	   ((lvaddr_t)pe_vaddr & BASE_PAGE_MASK) == 0);
515
516    struct capref empty_pml4;
517    err = slot_alloc(&empty_pml4);
518    assert(err_is_ok(err));
519    err = vnode_create(empty_pml4, ObjType_VNode_x86_64_pml4);
520    assert(err_is_ok(err));
521
522    id_pagetable_vbase = (lvaddr_t *)pe_vaddr;
523
524    identity_domain = vtd_new_domain(domains->min_did, pe_frame_id.base, empty_pml4, vtd_units);
525    assert(domains != NULL);
526    assert(domain_list_empty(domains));
527    domains->head = identity_domain;
528    domains->tail = identity_domain;
529
530    uint64_t mapped, remaining, vaddr, paddr;
531    vaddr = 0, paddr = 0;
532    remaining = max_addr;
533    while (remaining > 0) {
534        mapped = vtd_map(vaddr, paddr, id_pagetable_vbase, levels);
535        vaddr += mapped;
536        paddr += mapped;
537        remaining -= mapped;
538    }
539}
540
541// Called to add the devices specified in the translation structures to
542// the identity domain.
543static void vtd_add_dmar_devices(void)
544{
545    errval_t err;
546    err = skb_client_connect();
547    assert(err_is_ok(err));
548
549    err = skb_execute_query("dmar_devices(L),""length(L,Len),writeln(L)");
550    assert(err_is_ok(err));
551
552    struct list_parser_status status;
553    skb_read_list_init(&status);
554
555    int seg, bus, dev, func;
556    while(skb_read_list(&status, "address(%"PRIu32",%"PRIu32",%"PRIu32",%"PRIu32")",
557			&seg, &bus, &dev, &func)) {
558        err = vtd_domain_add_device(seg, bus, dev, func, identity_domain->pml4);
559        assert(err == VTD_ERR_DEV_USED || err == SYS_ERR_OK);
560    }
561}
562
563// Given the address of a PCIe bridge, return the bus downstream of it.
564static int vtd_find_secondary_bus(int bus, int dev, int func)
565{
566    errval_t err;
567    err = skb_execute_query("bridge(PCIE,addr(%d,%d,%d),_,_,_,_,_,secondary(BUS)),"
568			    "write(secondary_bus(BUS)).", bus, dev, func);
569    assert(err_is_ok(err));
570
571    int next_bus;
572    err = skb_read_output("secondary_bus(%d)", &next_bus);
573    assert(err_is_ok(err));
574
575    return next_bus;
576}
577
578// Parses a Path structure, comprised of (Device number, Function number) pairs,
579// representing the hierarchical path of a device. The address of the device
580// is returned in bus, dev, and func.
581static void vtd_parse_dev_path(int begin_bus, int *bus, int *dev, int *func, char *begin, char *end)
582{
583    assert((bus != NULL) && (dev != NULL) && (func != NULL));
584    int curr_bus, curr_dev, curr_func;
585    ACPI_DMAR_PCI_PATH *path_entry;
586
587    path_entry = (ACPI_DMAR_PCI_PATH *)begin;
588
589    curr_bus  = begin_bus;
590    curr_dev  = path_entry->Device;
591    curr_func = path_entry->Function;
592
593    path_entry = (ACPI_DMAR_PCI_PATH *)((char *)path_entry + sizeof(ACPI_DMAR_PCI_PATH));
594    while ((char *)path_entry != end) {
595        curr_bus  = vtd_find_secondary_bus(curr_bus, curr_dev, curr_func);
596	curr_dev  = path_entry->Device;
597	curr_func = path_entry->Function;
598
599	path_entry = (ACPI_DMAR_PCI_PATH *)((char *)path_entry + sizeof(ACPI_DMAR_PCI_PATH));
600    }
601    *bus  = curr_bus;
602    *dev  = curr_dev;
603    *func = curr_func;
604}
605
606// Parses a Device Scope Structure belonging to a remapping structure.
607static void vtd_parse_dev_scope_structure(int index, int segment, char *begin, char *end, enum AcpiDmarType type)
608{
609    errval_t err;
610    int path_length;
611    ACPI_DMAR_DEVICE_SCOPE *entry;
612    ACPI_DMAR_PCI_PATH *path_begin, *path_end;
613
614    entry = (ACPI_DMAR_DEVICE_SCOPE *)begin;
615    while ((char *)entry != end) {
616        path_length = entry->Length - sizeof(ACPI_DMAR_DEVICE_SCOPE);
617	assert(path_length == 2);
618
619	path_begin = (ACPI_DMAR_PCI_PATH *)((char *)entry + sizeof(ACPI_DMAR_DEVICE_SCOPE));
620	path_end   = (ACPI_DMAR_PCI_PATH *)((char *)path_begin + path_length);
621
622	int bus, dev, func;
623	vtd_parse_dev_path(entry->Bus, &bus, &dev, &func, (char *)path_begin, (char *)path_end);
624
625	err = skb_execute_query("dmar_device(%d, %"PRIu8",%"PRIu8","
626				"addr(%"PRIu32",%"PRIu32",%"PRIu32",%"PRIu32"),%"PRIu8").",
627				index, type, entry->EntryType, segment, bus, dev, func, entry->EnumerationId);
628
629	// A device may have already been reported to the SKB for an earlier
630	// translation structure.
631	if (err_is_fail(err)) {
632	    skb_add_fact("dmar_device(%d, %"PRIu8",%"PRIu8","
633			 "addr(%"PRIu32",%"PRIu32",%"PRIu32",%"PRIu32"),%"PRIu8").",
634			 index, type, entry->EntryType, segment, bus, dev, func, entry->EnumerationId);
635	    VTD_DEBUG("Adding device %d:%d:%d:%d\n", segment, bus, dev, func);
636	}
637
638	entry = (ACPI_DMAR_DEVICE_SCOPE *)((char *)entry + entry->Length);
639    }
640}
641
642// Parses a DMA Remapping Hardware Unit (DRHD) structure. There is at least one
643// such structure for each PCI segment.
644static void vtd_parse_drhd_structure(int index, char *begin, char *end)
645{
646    ACPI_DMAR_HARDWARE_UNIT *drhd;
647    struct vtd_unit *new_unit;
648
649    drhd = (ACPI_DMAR_HARDWARE_UNIT *)begin;
650
651    skb_add_fact("dmar_hardware_unit(%d, %"PRIu8", %"PRIu16", %"PRIu64").",
652		 index, drhd->Flags, drhd->Segment, drhd->Address);
653
654    new_unit = vtd_create_unit(vtd_map_registers(drhd->Address), drhd->Segment);
655    vtd_insert_context_tables(new_unit);
656    VTD_ADD_UNIT(new_unit, vtd_units);
657
658    vtd_parse_dev_scope_structure(index, drhd->Segment, begin + sizeof(ACPI_DMAR_HARDWARE_UNIT),
659				  end, ACPI_DMAR_TYPE_HARDWARE_UNIT);
660
661#ifdef VTD_DEBUG_
662    vtd_dump_registers(new_unit);
663#endif
664}
665
666// Parses a Reserved Memory Region Reporting (RMRR) remapping structure.
667// Reserved Memory Region Reporting structures report reserved memory regions for
668// devices that are each under the scope of some remapping hardware unit.
669static void vtd_parse_rmrr_structure(int index, char *begin, char *end)
670{
671    ACPI_DMAR_RESERVED_MEMORY *rmrr;
672    rmrr = (ACPI_DMAR_RESERVED_MEMORY *)begin;
673    skb_add_fact("dmar_reserved_memory(%d, %"PRIu16", %"PRIu64", %"PRIu64").",
674		 index, rmrr->Segment, rmrr->BaseAddress, rmrr->EndAddress);
675    vtd_parse_dev_scope_structure(index, rmrr->Segment, begin + sizeof(ACPI_DMAR_RESERVED_MEMORY),
676				  end, ACPI_DMAR_TYPE_RESERVED_MEMORY);
677}
678
679// Parses a Root Port ATS Capability Reporting (ATSR) structure.
680// An ATSR structure is provided for each PCI segment supporting Device-TLBs. Currently,
681// we only report the PCI segments supporting Device-TLBs and the associated PCIe
682// Root-Ports to the SKB.
683static void vtd_parse_atsr_structure(int index, char *begin, char *end)
684{
685    ACPI_DMAR_ATSR *atsr;
686    atsr = (ACPI_DMAR_ATSR *)begin;
687    skb_add_fact("dmar_atsr(%d, %"PRIu8", %"PRIu16").", index, atsr->Flags, atsr->Segment);
688    if (atsr->Flags == ACPI_DMAR_ALL_PORTS) {
689        return;
690    }
691    vtd_parse_dev_scope_structure(index, atsr->Segment, begin + sizeof(ACPI_DMAR_ATSR),
692				  end, ACPI_DMAR_TYPE_ROOT_ATS);
693}
694
695// Parses a Remapping Hardware Static Affinity (RHSA) structure.
696// RHSA structures are optional and are for platforms supporting non-uniform memory.
697// Currently, we only report the proximity domain each hardware unit belongs to(identified
698// by the base address of its register set) to the SKB.
699static void vtd_parse_rhsa_structure(int index, char *begin, char *end)
700{
701    ACPI_DMAR_RHSA *rhsa;
702    rhsa = (ACPI_DMAR_RHSA *)begin;
703    skb_add_fact("dmar_rhsa(%"PRIu64", %"PRIu32").", rhsa->BaseAddress, rhsa->ProximityDomain);
704}
705
706// Parses an ACPI Name-space Device Declaration structure (ANDD).
707// Currently, we only add the information about each ACPI name-space enumerated device
708// to the SKB.
709static void vtd_parse_andd_structure(int index, char *begin, char *end)
710{
711    ACPI_DMAR_ANDD *andd;
712    andd = (ACPI_DMAR_ANDD *)begin;
713    skb_add_fact("dmar_andd(%"PRIu8", %s).", andd->DeviceNumber, andd->DeviceName);
714}
715
716// Parses the DMA Remapping Reporting (DMAR) ACPI table.
717static ACPI_STATUS vtd_parse_dmar_table(void)
718{
719    ACPI_STATUS status;
720    ACPI_TABLE_DMAR *dmar;
721    ACPI_DMAR_HEADER *header;
722    char *structure, *structure_end;
723
724    status = AcpiGetTable(ACPI_SIG_DMAR, 0, (ACPI_TABLE_HEADER **)&dmar);
725    if (ACPI_FAILURE(status)) {
726        VTD_DEBUG("Failure in retrieving DMAR table.\n");
727        return status;
728    }
729
730    skb_add_fact("dmar(%"PRIu8")", dmar->Flags);
731
732    structure = (char *)dmar + sizeof(ACPI_TABLE_DMAR);
733    int dmar_unit_index = 0;
734    while (structure != ((char *)dmar + dmar->Header.Length)) {
735        header = (ACPI_DMAR_HEADER *)structure;
736        structure_end = structure + header->Length;
737
738        switch (header->Type) {
739        case ACPI_DMAR_TYPE_HARDWARE_UNIT:
740            vtd_parse_drhd_structure(dmar_unit_index, structure, structure_end);
741            break;
742        case ACPI_DMAR_TYPE_RESERVED_MEMORY:
743            vtd_parse_rmrr_structure(dmar_unit_index, structure, structure_end);
744            break;
745        case ACPI_DMAR_TYPE_ROOT_ATS:
746            vtd_parse_atsr_structure(dmar_unit_index, structure, structure_end);
747            break;
748        case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
749            vtd_parse_rhsa_structure(dmar_unit_index, structure, structure_end);
750            break;
751        case ACPI_DMAR_TYPE_NAMESPACE:
752            vtd_parse_andd_structure(dmar_unit_index, structure, structure_end);
753            break;
754        default: assert(!"Reserved for future use!\n");
755        }
756
757        structure = structure_end;
758        dmar_unit_index++;
759    }
760    return AE_OK;
761}
762
763// Add devices on this platform to the identity domain
764void vtd_identity_domain_add_devices(void)
765{
766    if (!vtd_enabled) return;
767    if (vtd_no_units(vtd_units)) return;
768
769    errval_t err;
770    err = skb_client_connect();
771    assert(err_is_ok(err));
772
773    // Add PCIe-to-PCIe bridges to the identity domain.
774    err = skb_execute_query("pcie_bridges(L),length(L,Len),writeln(L)");
775    assert(err_is_ok(err));
776
777    struct list_parser_status status;
778    skb_read_list_init(&status);
779
780    int bus, dev, func;
781    while(skb_read_list(&status, "address(%"PRIu32",%"PRIu32",%"PRIu32")", &bus, &dev, &func)) {
782        VTD_DEBUG("adding device (bridge) %d:%d:%d to the identity domain.\n", bus, dev, func);
783	err = vtd_domain_add_device(0, bus, dev, func, identity_domain->pml4);
784	assert(err == VTD_ERR_DEV_USED || err == SYS_ERR_OK);
785    }
786
787    err = skb_execute_query("find_devices(L),length(L,Len),writeln(L)");
788    assert(err_is_ok(err));
789
790    skb_read_list_init(&status);
791
792    // Add all PCIe devices present on the platform to the identity domain. Since PCI
793    // devices behind PCIe-to-PCI/PCI-X bridges and conventional PCI bridges have the
794    // same source-id on their transactions, only add endpoint PCI devices on the root
795    // bus.
796    char s_type[5];
797    while(skb_read_list(&status, "address(%[a-z],%"PRIu32",%"PRIu32",%"PRIu32")", s_type, &bus, &dev, &func)) {
798        VTD_DEBUG("adding %s device %d:%d:%d to the identity domain.\n", s_type, bus, dev, func);
799	if (!strncmp(s_type, "pcie", strlen("pcie"))) {
800	    err = vtd_domain_add_device(0, bus, dev, func, identity_domain->pml4);
801	    assert(err == VTD_ERR_DEV_USED || err == SYS_ERR_OK);
802	} else if (!strncmp(s_type, "pci", strlen("pci"))) {
803	    if (bus == 0) {
804	        err = vtd_domain_add_device(0, bus, dev, func, identity_domain->pml4);
805		assert(err == VTD_ERR_DEV_USED || err == SYS_ERR_OK);
806	    }
807	}
808    }
809}
810
811int vtd_init(void)
812{
813    ACPI_STATUS as;
814    vtd_map_attr = (cache_coherence ? VREGION_FLAGS_READ_WRITE :
815			              VREGION_FLAGS_READ_WRITE_NOCACHE);
816    as = vtd_parse_dmar_table();
817    if (ACPI_FAILURE(as)) {
818        return 1;
819    }
820
821    if (vtd_units == NULL) {
822        VTD_DEBUG("DMA remapping: no HW units, not enabling\n");
823        return 1;
824    }
825
826    if (vtd_page_bits(vtd_units) < 21) {
827        VTD_DEBUG("VT-d: no large page support, not enabling\n");
828        return 1;
829    }
830
831    vtd_enabled = true;
832
833    // When we have finished parsing the DMAR table, we create the identity
834    // domain and determine the domain-id bounds that can be used on all
835    // hardware units.
836    domains = vtd_new_domain_list();
837    vtd_create_did_bounds(vtd_units, domains);
838    vtd_create_identity_domain();
839    vtd_add_dmar_devices();
840
841    struct vtd_unit *u = NULL;
842    VTD_FOR_EACH(u, vtd_units) {
843        vtd_set_root_table(u);
844	vtd_trnsl_enable(u);
845	skb_add_fact("vtd_enabled(%"PRIu16",%"PRIu8").", u->pci_seg, vtd_coherency(u));
846    }
847
848    VTD_DEBUG("Enabling DMA remapping succeeded\n");
849
850    return 0;
851}
852