1257251Skib/*- 2257251Skib * Copyright (c) 2013 The FreeBSD Foundation 3257251Skib * All rights reserved. 4257251Skib * 5257251Skib * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 6257251Skib * under sponsorship from the FreeBSD Foundation. 7257251Skib * 8257251Skib * Redistribution and use in source and binary forms, with or without 9257251Skib * modification, are permitted provided that the following conditions 10257251Skib * are met: 11257251Skib * 1. Redistributions of source code must retain the above copyright 12257251Skib * notice, this list of conditions and the following disclaimer. 13257251Skib * 2. Redistributions in binary form must reproduce the above copyright 14257251Skib * notice, this list of conditions and the following disclaimer in the 15257251Skib * documentation and/or other materials provided with the distribution. 16257251Skib * 17257251Skib * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18257251Skib * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19257251Skib * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20257251Skib * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21257251Skib * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22257251Skib * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23257251Skib * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24257251Skib * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25257251Skib * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26257251Skib * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27257251Skib * SUCH DAMAGE. 28257251Skib */ 29257251Skib 30257251Skib#include <sys/cdefs.h> 31257251Skib__FBSDID("$FreeBSD$"); 32257251Skib 33257251Skib#include <sys/param.h> 34257251Skib#include <sys/systm.h> 35257251Skib#include <sys/malloc.h> 36257251Skib#include <sys/bus.h> 37257251Skib#include <sys/interrupt.h> 38257251Skib#include <sys/kernel.h> 39257251Skib#include <sys/ktr.h> 40257251Skib#include <sys/lock.h> 41257251Skib#include <sys/memdesc.h> 42257251Skib#include <sys/mutex.h> 43257251Skib#include <sys/proc.h> 44257251Skib#include <sys/rwlock.h> 45257251Skib#include <sys/rman.h> 46257251Skib#include <sys/sf_buf.h> 47257251Skib#include <sys/sysctl.h> 48257251Skib#include <sys/taskqueue.h> 49257251Skib#include <sys/tree.h> 50257251Skib#include <sys/uio.h> 51280260Skib#include <sys/vmem.h> 52257251Skib#include <vm/vm.h> 53257251Skib#include <vm/vm_extern.h> 54257251Skib#include <vm/vm_kern.h> 55257251Skib#include <vm/vm_object.h> 56257251Skib#include <vm/vm_page.h> 57257251Skib#include <vm/vm_pager.h> 58257251Skib#include <vm/vm_map.h> 59257251Skib#include <machine/atomic.h> 60257251Skib#include <machine/bus.h> 61257251Skib#include <machine/cpu.h> 62257251Skib#include <machine/md_var.h> 63257251Skib#include <machine/specialreg.h> 64257251Skib#include <x86/include/busdma_impl.h> 65257251Skib#include <x86/iommu/intel_reg.h> 66257251Skib#include <x86/iommu/busdma_dmar.h> 67257251Skib#include <x86/iommu/intel_dmar.h> 68257251Skib 69284869Skibstatic int domain_unmap_buf_locked(struct dmar_domain *domain, 70284869Skib dmar_gaddr_t base, dmar_gaddr_t size, int flags); 71257251Skib 72257251Skib/* 73257251Skib * The cache of the identity mapping page tables for the DMARs. Using 74257251Skib * the cache saves significant amount of memory for page tables by 75257251Skib * reusing the page tables, since usually DMARs are identical and have 76257251Skib * the same capabilities. Still, cache records the information needed 77257251Skib * to match DMAR capabilities and page table format, to correctly 78257251Skib * handle different DMARs. 79257251Skib */ 80257251Skib 81257251Skibstruct idpgtbl { 82257251Skib dmar_gaddr_t maxaddr; /* Page table covers the guest address 83257251Skib range [0..maxaddr) */ 84257251Skib int pglvl; /* Total page table levels ignoring 85257251Skib superpages */ 86257251Skib int leaf; /* The last materialized page table 87257251Skib level, it is non-zero if superpages 88257251Skib are supported */ 89257251Skib vm_object_t pgtbl_obj; /* The page table pages */ 90257251Skib LIST_ENTRY(idpgtbl) link; 91257251Skib}; 92257251Skib 93257251Skibstatic struct sx idpgtbl_lock; 94257251SkibSX_SYSINIT(idpgtbl, &idpgtbl_lock, "idpgtbl"); 95257251Skibstatic LIST_HEAD(, idpgtbl) idpgtbls = LIST_HEAD_INITIALIZER(idpgtbls); 96257251Skibstatic MALLOC_DEFINE(M_DMAR_IDPGTBL, "dmar_idpgtbl", 97257251Skib "Intel DMAR Identity mappings cache elements"); 98257251Skib 99257251Skib/* 100257251Skib * Build the next level of the page tables for the identity mapping. 101257251Skib * - lvl is the level to build; 102257251Skib * - idx is the index of the page table page in the pgtbl_obj, which is 103257251Skib * being allocated filled now; 104257251Skib * - addr is the starting address in the bus address space which is 105257251Skib * mapped by the page table page. 106257251Skib */ 107257251Skibstatic void 108284869Skibdomain_idmap_nextlvl(struct idpgtbl *tbl, int lvl, vm_pindex_t idx, 109257251Skib dmar_gaddr_t addr) 110257251Skib{ 111283735Skib vm_page_t m1; 112257251Skib dmar_pte_t *pte; 113257251Skib struct sf_buf *sf; 114257251Skib dmar_gaddr_t f, pg_sz; 115257251Skib vm_pindex_t base; 116257251Skib int i; 117257251Skib 118257251Skib VM_OBJECT_ASSERT_LOCKED(tbl->pgtbl_obj); 119257251Skib if (addr >= tbl->maxaddr) 120257251Skib return; 121283735Skib (void)dmar_pgalloc(tbl->pgtbl_obj, idx, DMAR_PGF_OBJL | DMAR_PGF_WAITOK | 122257251Skib DMAR_PGF_ZERO); 123257251Skib base = idx * DMAR_NPTEPG + 1; /* Index of the first child page of idx */ 124257251Skib pg_sz = pglvl_page_size(tbl->pglvl, lvl); 125257251Skib if (lvl != tbl->leaf) { 126257251Skib for (i = 0, f = addr; i < DMAR_NPTEPG; i++, f += pg_sz) 127284869Skib domain_idmap_nextlvl(tbl, lvl + 1, base + i, f); 128257251Skib } 129257251Skib VM_OBJECT_WUNLOCK(tbl->pgtbl_obj); 130257251Skib pte = dmar_map_pgtbl(tbl->pgtbl_obj, idx, DMAR_PGF_WAITOK, &sf); 131257251Skib if (lvl == tbl->leaf) { 132257251Skib for (i = 0, f = addr; i < DMAR_NPTEPG; i++, f += pg_sz) { 133257251Skib if (f >= tbl->maxaddr) 134257251Skib break; 135257251Skib pte[i].pte = (DMAR_PTE_ADDR_MASK & f) | 136257251Skib DMAR_PTE_R | DMAR_PTE_W; 137257251Skib } 138257251Skib } else { 139257251Skib for (i = 0, f = addr; i < DMAR_NPTEPG; i++, f += pg_sz) { 140257251Skib if (f >= tbl->maxaddr) 141257251Skib break; 142257251Skib m1 = dmar_pgalloc(tbl->pgtbl_obj, base + i, 143257251Skib DMAR_PGF_NOALLOC); 144257251Skib KASSERT(m1 != NULL, ("lost page table page")); 145257251Skib pte[i].pte = (DMAR_PTE_ADDR_MASK & 146257251Skib VM_PAGE_TO_PHYS(m1)) | DMAR_PTE_R | DMAR_PTE_W; 147257251Skib } 148257251Skib } 149284869Skib /* domain_get_idmap_pgtbl flushes CPU cache if needed. */ 150277023Skib dmar_unmap_pgtbl(sf); 151257251Skib VM_OBJECT_WLOCK(tbl->pgtbl_obj); 152257251Skib} 153257251Skib 154257251Skib/* 155257251Skib * Find a ready and compatible identity-mapping page table in the 156257251Skib * cache. If not found, populate the identity-mapping page table for 157257251Skib * the context, up to the maxaddr. The maxaddr byte is allowed to be 158257251Skib * not mapped, which is aligned with the definition of Maxmem as the 159257251Skib * highest usable physical address + 1. If superpages are used, the 160257251Skib * maxaddr is typically mapped. 161257251Skib */ 162257251Skibvm_object_t 163284869Skibdomain_get_idmap_pgtbl(struct dmar_domain *domain, dmar_gaddr_t maxaddr) 164257251Skib{ 165257251Skib struct dmar_unit *unit; 166257251Skib struct idpgtbl *tbl; 167257251Skib vm_object_t res; 168257251Skib vm_page_t m; 169257251Skib int leaf, i; 170257251Skib 171257900Sdim leaf = 0; /* silence gcc */ 172257900Sdim 173257251Skib /* 174257251Skib * First, determine where to stop the paging structures. 175257251Skib */ 176284869Skib for (i = 0; i < domain->pglvl; i++) { 177284869Skib if (i == domain->pglvl - 1 || domain_is_sp_lvl(domain, i)) { 178257251Skib leaf = i; 179257251Skib break; 180257251Skib } 181257251Skib } 182257251Skib 183257251Skib /* 184257251Skib * Search the cache for a compatible page table. Qualified 185257251Skib * page table must map up to maxaddr, its level must be 186257251Skib * supported by the DMAR and leaf should be equal to the 187257251Skib * calculated value. The later restriction could be lifted 188257251Skib * but I believe it is currently impossible to have any 189257251Skib * deviations for existing hardware. 190257251Skib */ 191257251Skib sx_slock(&idpgtbl_lock); 192257251Skib LIST_FOREACH(tbl, &idpgtbls, link) { 193257251Skib if (tbl->maxaddr >= maxaddr && 194284869Skib dmar_pglvl_supported(domain->dmar, tbl->pglvl) && 195257251Skib tbl->leaf == leaf) { 196257251Skib res = tbl->pgtbl_obj; 197257251Skib vm_object_reference(res); 198257251Skib sx_sunlock(&idpgtbl_lock); 199284869Skib domain->pglvl = tbl->pglvl; /* XXXKIB ? */ 200257251Skib goto end; 201257251Skib } 202257251Skib } 203257251Skib 204257251Skib /* 205257251Skib * Not found in cache, relock the cache into exclusive mode to 206257251Skib * be able to add element, and recheck cache again after the 207257251Skib * relock. 208257251Skib */ 209257251Skib sx_sunlock(&idpgtbl_lock); 210257251Skib sx_xlock(&idpgtbl_lock); 211257251Skib LIST_FOREACH(tbl, &idpgtbls, link) { 212257251Skib if (tbl->maxaddr >= maxaddr && 213284869Skib dmar_pglvl_supported(domain->dmar, tbl->pglvl) && 214257251Skib tbl->leaf == leaf) { 215257251Skib res = tbl->pgtbl_obj; 216257251Skib vm_object_reference(res); 217257251Skib sx_xunlock(&idpgtbl_lock); 218284869Skib domain->pglvl = tbl->pglvl; /* XXXKIB ? */ 219257251Skib return (res); 220257251Skib } 221257251Skib } 222257251Skib 223257251Skib /* 224257251Skib * Still not found, create new page table. 225257251Skib */ 226257251Skib tbl = malloc(sizeof(*tbl), M_DMAR_IDPGTBL, M_WAITOK); 227284869Skib tbl->pglvl = domain->pglvl; 228257251Skib tbl->leaf = leaf; 229257251Skib tbl->maxaddr = maxaddr; 230257251Skib tbl->pgtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL, 231257251Skib IDX_TO_OFF(pglvl_max_pages(tbl->pglvl)), 0, 0, NULL); 232257251Skib VM_OBJECT_WLOCK(tbl->pgtbl_obj); 233284869Skib domain_idmap_nextlvl(tbl, 0, 0, 0); 234257251Skib VM_OBJECT_WUNLOCK(tbl->pgtbl_obj); 235257251Skib LIST_INSERT_HEAD(&idpgtbls, tbl, link); 236257251Skib res = tbl->pgtbl_obj; 237257251Skib vm_object_reference(res); 238257251Skib sx_xunlock(&idpgtbl_lock); 239257251Skib 240257251Skibend: 241257251Skib /* 242257251Skib * Table was found or created. 243257251Skib * 244257251Skib * If DMAR does not snoop paging structures accesses, flush 245257251Skib * CPU cache to memory. Note that dmar_unmap_pgtbl() coherent 246257251Skib * argument was possibly invalid at the time of the identity 247257251Skib * page table creation, since DMAR which was passed at the 248257251Skib * time of creation could be coherent, while current DMAR is 249257251Skib * not. 250257251Skib * 251257251Skib * If DMAR cannot look into the chipset write buffer, flush it 252257251Skib * as well. 253257251Skib */ 254284869Skib unit = domain->dmar; 255257251Skib if (!DMAR_IS_COHERENT(unit)) { 256257251Skib VM_OBJECT_WLOCK(res); 257257251Skib for (m = vm_page_lookup(res, 0); m != NULL; 258257251Skib m = vm_page_next(m)) 259257251Skib pmap_invalidate_cache_pages(&m, 1); 260257251Skib VM_OBJECT_WUNLOCK(res); 261257251Skib } 262257251Skib if ((unit->hw_cap & DMAR_CAP_RWBF) != 0) { 263257251Skib DMAR_LOCK(unit); 264257251Skib dmar_flush_write_bufs(unit); 265257251Skib DMAR_UNLOCK(unit); 266257251Skib } 267257251Skib 268257251Skib return (res); 269257251Skib} 270257251Skib 271257251Skib/* 272257251Skib * Return a reference to the identity mapping page table to the cache. 273257251Skib */ 274257251Skibvoid 275257251Skibput_idmap_pgtbl(vm_object_t obj) 276257251Skib{ 277257251Skib struct idpgtbl *tbl, *tbl1; 278257251Skib vm_object_t rmobj; 279257251Skib 280257251Skib sx_slock(&idpgtbl_lock); 281257251Skib KASSERT(obj->ref_count >= 2, ("lost cache reference")); 282257251Skib vm_object_deallocate(obj); 283257251Skib 284257251Skib /* 285257251Skib * Cache always owns one last reference on the page table object. 286257251Skib * If there is an additional reference, object must stay. 287257251Skib */ 288257251Skib if (obj->ref_count > 1) { 289257251Skib sx_sunlock(&idpgtbl_lock); 290257251Skib return; 291257251Skib } 292257251Skib 293257251Skib /* 294257251Skib * Cache reference is the last, remove cache element and free 295257251Skib * page table object, returning the page table pages to the 296257251Skib * system. 297257251Skib */ 298257251Skib sx_sunlock(&idpgtbl_lock); 299257251Skib sx_xlock(&idpgtbl_lock); 300257251Skib LIST_FOREACH_SAFE(tbl, &idpgtbls, link, tbl1) { 301257251Skib rmobj = tbl->pgtbl_obj; 302257251Skib if (rmobj->ref_count == 1) { 303257251Skib LIST_REMOVE(tbl, link); 304257251Skib atomic_subtract_int(&dmar_tbl_pagecnt, 305257251Skib rmobj->resident_page_count); 306257251Skib vm_object_deallocate(rmobj); 307257251Skib free(tbl, M_DMAR_IDPGTBL); 308257251Skib } 309257251Skib } 310257251Skib sx_xunlock(&idpgtbl_lock); 311257251Skib} 312257251Skib 313257251Skib/* 314257251Skib * The core routines to map and unmap host pages at the given guest 315257251Skib * address. Support superpages. 316257251Skib */ 317257251Skib 318257251Skib/* 319257251Skib * Index of the pte for the guest address base in the page table at 320257251Skib * the level lvl. 321257251Skib */ 322257251Skibstatic int 323284869Skibdomain_pgtbl_pte_off(struct dmar_domain *domain, dmar_gaddr_t base, int lvl) 324257251Skib{ 325257251Skib 326284869Skib base >>= DMAR_PAGE_SHIFT + (domain->pglvl - lvl - 1) * 327284869Skib DMAR_NPTEPGSHIFT; 328257251Skib return (base & DMAR_PTEMASK); 329257251Skib} 330257251Skib 331257251Skib/* 332257251Skib * Returns the page index of the page table page in the page table 333257251Skib * object, which maps the given address base at the page table level 334257251Skib * lvl. 335257251Skib */ 336257251Skibstatic vm_pindex_t 337284869Skibdomain_pgtbl_get_pindex(struct dmar_domain *domain, dmar_gaddr_t base, int lvl) 338257251Skib{ 339257251Skib vm_pindex_t idx, pidx; 340257251Skib int i; 341257251Skib 342284869Skib KASSERT(lvl >= 0 && lvl < domain->pglvl, 343284869Skib ("wrong lvl %p %d", domain, lvl)); 344257251Skib 345284869Skib for (pidx = idx = 0, i = 0; i < lvl; i++, pidx = idx) { 346284869Skib idx = domain_pgtbl_pte_off(domain, base, i) + 347284869Skib pidx * DMAR_NPTEPG + 1; 348284869Skib } 349257251Skib return (idx); 350257251Skib} 351257251Skib 352257251Skibstatic dmar_pte_t * 353284869Skibdomain_pgtbl_map_pte(struct dmar_domain *domain, dmar_gaddr_t base, int lvl, 354284869Skib int flags, vm_pindex_t *idxp, struct sf_buf **sf) 355257251Skib{ 356257251Skib vm_page_t m; 357257251Skib struct sf_buf *sfp; 358257251Skib dmar_pte_t *pte, *ptep; 359257251Skib vm_pindex_t idx, idx1; 360257251Skib 361284869Skib DMAR_DOMAIN_ASSERT_PGLOCKED(domain); 362257251Skib KASSERT((flags & DMAR_PGF_OBJL) != 0, ("lost PGF_OBJL")); 363257251Skib 364284869Skib idx = domain_pgtbl_get_pindex(domain, base, lvl); 365257251Skib if (*sf != NULL && idx == *idxp) { 366257251Skib pte = (dmar_pte_t *)sf_buf_kva(*sf); 367257251Skib } else { 368257251Skib if (*sf != NULL) 369277023Skib dmar_unmap_pgtbl(*sf); 370257251Skib *idxp = idx; 371257251Skibretry: 372284869Skib pte = dmar_map_pgtbl(domain->pgtbl_obj, idx, flags, sf); 373257251Skib if (pte == NULL) { 374284869Skib KASSERT(lvl > 0, 375284869Skib ("lost root page table page %p", domain)); 376257251Skib /* 377286777Skib * Page table page does not exist, allocate 378286777Skib * it and create a pte in the preceeding page level 379286777Skib * to reference the allocated page table page. 380257251Skib */ 381284869Skib m = dmar_pgalloc(domain->pgtbl_obj, idx, flags | 382257251Skib DMAR_PGF_ZERO); 383257251Skib if (m == NULL) 384257251Skib return (NULL); 385257251Skib 386257251Skib /* 387257251Skib * Prevent potential free while pgtbl_obj is 388257251Skib * unlocked in the recursive call to 389284869Skib * domain_pgtbl_map_pte(), if other thread did 390285724Skib * pte write and clean while the lock is 391257251Skib * dropped. 392257251Skib */ 393257251Skib m->wire_count++; 394257251Skib 395257251Skib sfp = NULL; 396284869Skib ptep = domain_pgtbl_map_pte(domain, base, lvl - 1, 397284869Skib flags, &idx1, &sfp); 398257251Skib if (ptep == NULL) { 399257251Skib KASSERT(m->pindex != 0, 400284869Skib ("loosing root page %p", domain)); 401257251Skib m->wire_count--; 402284869Skib dmar_pgfree(domain->pgtbl_obj, m->pindex, 403284869Skib flags); 404257251Skib return (NULL); 405257251Skib } 406257251Skib dmar_pte_store(&ptep->pte, DMAR_PTE_R | DMAR_PTE_W | 407257251Skib VM_PAGE_TO_PHYS(m)); 408284869Skib dmar_flush_pte_to_ram(domain->dmar, ptep); 409257251Skib sf_buf_page(sfp)->wire_count += 1; 410257251Skib m->wire_count--; 411277023Skib dmar_unmap_pgtbl(sfp); 412257251Skib /* Only executed once. */ 413257251Skib goto retry; 414257251Skib } 415257251Skib } 416284869Skib pte += domain_pgtbl_pte_off(domain, base, lvl); 417257251Skib return (pte); 418257251Skib} 419257251Skib 420257251Skibstatic int 421284869Skibdomain_map_buf_locked(struct dmar_domain *domain, dmar_gaddr_t base, 422284869Skib dmar_gaddr_t size, vm_page_t *ma, uint64_t pflags, int flags) 423257251Skib{ 424257251Skib dmar_pte_t *pte; 425257251Skib struct sf_buf *sf; 426257251Skib dmar_gaddr_t pg_sz, base1, size1; 427257251Skib vm_pindex_t pi, c, idx, run_sz; 428257251Skib int lvl; 429257251Skib bool superpage; 430257251Skib 431284869Skib DMAR_DOMAIN_ASSERT_PGLOCKED(domain); 432257251Skib 433257251Skib base1 = base; 434257251Skib size1 = size; 435257251Skib flags |= DMAR_PGF_OBJL; 436257251Skib TD_PREP_PINNED_ASSERT; 437257251Skib 438257251Skib for (sf = NULL, pi = 0; size > 0; base += pg_sz, size -= pg_sz, 439257251Skib pi += run_sz) { 440257251Skib for (lvl = 0, c = 0, superpage = false;; lvl++) { 441284869Skib pg_sz = domain_page_size(domain, lvl); 442257251Skib run_sz = pg_sz >> DMAR_PAGE_SHIFT; 443284869Skib if (lvl == domain->pglvl - 1) 444257251Skib break; 445257251Skib /* 446257251Skib * Check if the current base suitable for the 447257251Skib * superpage mapping. First, verify the level. 448257251Skib */ 449284869Skib if (!domain_is_sp_lvl(domain, lvl)) 450257251Skib continue; 451257251Skib /* 452257251Skib * Next, look at the size of the mapping and 453257251Skib * alignment of both guest and host addresses. 454257251Skib */ 455257251Skib if (size < pg_sz || (base & (pg_sz - 1)) != 0 || 456257251Skib (VM_PAGE_TO_PHYS(ma[pi]) & (pg_sz - 1)) != 0) 457257251Skib continue; 458257251Skib /* All passed, check host pages contiguouty. */ 459257251Skib if (c == 0) { 460257251Skib for (c = 1; c < run_sz; c++) { 461257251Skib if (VM_PAGE_TO_PHYS(ma[pi + c]) != 462257251Skib VM_PAGE_TO_PHYS(ma[pi + c - 1]) + 463257251Skib PAGE_SIZE) 464257251Skib break; 465257251Skib } 466257251Skib } 467257251Skib if (c >= run_sz) { 468257251Skib superpage = true; 469257251Skib break; 470257251Skib } 471257251Skib } 472257251Skib KASSERT(size >= pg_sz, 473284869Skib ("mapping loop overflow %p %jx %jx %jx", domain, 474257251Skib (uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz)); 475280434Skib KASSERT(pg_sz > 0, ("pg_sz 0 lvl %d", lvl)); 476284869Skib pte = domain_pgtbl_map_pte(domain, base, lvl, flags, &idx, &sf); 477257251Skib if (pte == NULL) { 478257251Skib KASSERT((flags & DMAR_PGF_WAITOK) == 0, 479284869Skib ("failed waitable pte alloc %p", domain)); 480277023Skib if (sf != NULL) 481277023Skib dmar_unmap_pgtbl(sf); 482284869Skib domain_unmap_buf_locked(domain, base1, base - base1, 483284869Skib flags); 484257251Skib TD_PINNED_ASSERT; 485257251Skib return (ENOMEM); 486257251Skib } 487257251Skib dmar_pte_store(&pte->pte, VM_PAGE_TO_PHYS(ma[pi]) | pflags | 488257251Skib (superpage ? DMAR_PTE_SP : 0)); 489284869Skib dmar_flush_pte_to_ram(domain->dmar, pte); 490257251Skib sf_buf_page(sf)->wire_count += 1; 491257251Skib } 492257251Skib if (sf != NULL) 493277023Skib dmar_unmap_pgtbl(sf); 494257251Skib TD_PINNED_ASSERT; 495257251Skib return (0); 496257251Skib} 497257251Skib 498257251Skibint 499284869Skibdomain_map_buf(struct dmar_domain *domain, dmar_gaddr_t base, dmar_gaddr_t size, 500257251Skib vm_page_t *ma, uint64_t pflags, int flags) 501257251Skib{ 502257512Skib struct dmar_unit *unit; 503257512Skib int error; 504257251Skib 505284869Skib unit = domain->dmar; 506257512Skib 507284869Skib KASSERT((domain->flags & DMAR_DOMAIN_IDMAP) == 0, 508284869Skib ("modifying idmap pagetable domain %p", domain)); 509257251Skib KASSERT((base & DMAR_PAGE_MASK) == 0, 510284869Skib ("non-aligned base %p %jx %jx", domain, (uintmax_t)base, 511257251Skib (uintmax_t)size)); 512257251Skib KASSERT((size & DMAR_PAGE_MASK) == 0, 513284869Skib ("non-aligned size %p %jx %jx", domain, (uintmax_t)base, 514257251Skib (uintmax_t)size)); 515284869Skib KASSERT(size > 0, ("zero size %p %jx %jx", domain, (uintmax_t)base, 516257251Skib (uintmax_t)size)); 517284869Skib KASSERT(base < (1ULL << domain->agaw), 518284869Skib ("base too high %p %jx %jx agaw %d", domain, (uintmax_t)base, 519284869Skib (uintmax_t)size, domain->agaw)); 520284869Skib KASSERT(base + size < (1ULL << domain->agaw), 521284869Skib ("end too high %p %jx %jx agaw %d", domain, (uintmax_t)base, 522284869Skib (uintmax_t)size, domain->agaw)); 523257251Skib KASSERT(base + size > base, 524284869Skib ("size overflow %p %jx %jx", domain, (uintmax_t)base, 525257251Skib (uintmax_t)size)); 526257251Skib KASSERT((pflags & (DMAR_PTE_R | DMAR_PTE_W)) != 0, 527257251Skib ("neither read nor write %jx", (uintmax_t)pflags)); 528257251Skib KASSERT((pflags & ~(DMAR_PTE_R | DMAR_PTE_W | DMAR_PTE_SNP | 529257251Skib DMAR_PTE_TM)) == 0, 530257251Skib ("invalid pte flags %jx", (uintmax_t)pflags)); 531257251Skib KASSERT((pflags & DMAR_PTE_SNP) == 0 || 532257512Skib (unit->hw_ecap & DMAR_ECAP_SC) != 0, 533257251Skib ("PTE_SNP for dmar without snoop control %p %jx", 534284869Skib domain, (uintmax_t)pflags)); 535257251Skib KASSERT((pflags & DMAR_PTE_TM) == 0 || 536257512Skib (unit->hw_ecap & DMAR_ECAP_DI) != 0, 537257251Skib ("PTE_TM for dmar without DIOTLB %p %jx", 538284869Skib domain, (uintmax_t)pflags)); 539257251Skib KASSERT((flags & ~DMAR_PGF_WAITOK) == 0, ("invalid flags %x", flags)); 540257251Skib 541284869Skib DMAR_DOMAIN_PGLOCK(domain); 542284869Skib error = domain_map_buf_locked(domain, base, size, ma, pflags, flags); 543284869Skib DMAR_DOMAIN_PGUNLOCK(domain); 544257512Skib if (error != 0) 545257512Skib return (error); 546257512Skib 547257512Skib if ((unit->hw_cap & DMAR_CAP_CM) != 0) 548284869Skib domain_flush_iotlb_sync(domain, base, size); 549257512Skib else if ((unit->hw_cap & DMAR_CAP_RWBF) != 0) { 550257512Skib /* See 11.1 Write Buffer Flushing. */ 551257512Skib DMAR_LOCK(unit); 552257512Skib dmar_flush_write_bufs(unit); 553257512Skib DMAR_UNLOCK(unit); 554257512Skib } 555257512Skib return (0); 556257251Skib} 557257251Skib 558284869Skibstatic void domain_unmap_clear_pte(struct dmar_domain *domain, 559284869Skib dmar_gaddr_t base, int lvl, int flags, dmar_pte_t *pte, 560284869Skib struct sf_buf **sf, bool free_fs); 561257251Skib 562257251Skibstatic void 563284869Skibdomain_free_pgtbl_pde(struct dmar_domain *domain, dmar_gaddr_t base, 564284869Skib int lvl, int flags) 565257251Skib{ 566257251Skib struct sf_buf *sf; 567257251Skib dmar_pte_t *pde; 568257251Skib vm_pindex_t idx; 569257251Skib 570257251Skib sf = NULL; 571284869Skib pde = domain_pgtbl_map_pte(domain, base, lvl, flags, &idx, &sf); 572284869Skib domain_unmap_clear_pte(domain, base, lvl, flags, pde, &sf, true); 573257251Skib} 574257251Skib 575257251Skibstatic void 576284869Skibdomain_unmap_clear_pte(struct dmar_domain *domain, dmar_gaddr_t base, int lvl, 577257251Skib int flags, dmar_pte_t *pte, struct sf_buf **sf, bool free_sf) 578257251Skib{ 579257251Skib vm_page_t m; 580257251Skib 581257251Skib dmar_pte_clear(&pte->pte); 582284869Skib dmar_flush_pte_to_ram(domain->dmar, pte); 583257251Skib m = sf_buf_page(*sf); 584257251Skib if (free_sf) { 585277023Skib dmar_unmap_pgtbl(*sf); 586257251Skib *sf = NULL; 587257251Skib } 588257251Skib m->wire_count--; 589257251Skib if (m->wire_count != 0) 590257251Skib return; 591257251Skib KASSERT(lvl != 0, 592284869Skib ("lost reference (lvl) on root pg domain %p base %jx lvl %d", 593284869Skib domain, (uintmax_t)base, lvl)); 594257251Skib KASSERT(m->pindex != 0, 595284869Skib ("lost reference (idx) on root pg domain %p base %jx lvl %d", 596284869Skib domain, (uintmax_t)base, lvl)); 597284869Skib dmar_pgfree(domain->pgtbl_obj, m->pindex, flags); 598284869Skib domain_free_pgtbl_pde(domain, base, lvl - 1, flags); 599257251Skib} 600257251Skib 601257251Skib/* 602257251Skib * Assumes that the unmap is never partial. 603257251Skib */ 604257251Skibstatic int 605284869Skibdomain_unmap_buf_locked(struct dmar_domain *domain, dmar_gaddr_t base, 606257251Skib dmar_gaddr_t size, int flags) 607257251Skib{ 608257251Skib dmar_pte_t *pte; 609257251Skib struct sf_buf *sf; 610257251Skib vm_pindex_t idx; 611283735Skib dmar_gaddr_t pg_sz; 612257251Skib int lvl; 613257251Skib 614284869Skib DMAR_DOMAIN_ASSERT_PGLOCKED(domain); 615257251Skib if (size == 0) 616257251Skib return (0); 617257251Skib 618284869Skib KASSERT((domain->flags & DMAR_DOMAIN_IDMAP) == 0, 619284869Skib ("modifying idmap pagetable domain %p", domain)); 620257251Skib KASSERT((base & DMAR_PAGE_MASK) == 0, 621284869Skib ("non-aligned base %p %jx %jx", domain, (uintmax_t)base, 622257251Skib (uintmax_t)size)); 623257251Skib KASSERT((size & DMAR_PAGE_MASK) == 0, 624284869Skib ("non-aligned size %p %jx %jx", domain, (uintmax_t)base, 625257251Skib (uintmax_t)size)); 626284869Skib KASSERT(base < (1ULL << domain->agaw), 627284869Skib ("base too high %p %jx %jx agaw %d", domain, (uintmax_t)base, 628284869Skib (uintmax_t)size, domain->agaw)); 629284869Skib KASSERT(base + size < (1ULL << domain->agaw), 630284869Skib ("end too high %p %jx %jx agaw %d", domain, (uintmax_t)base, 631284869Skib (uintmax_t)size, domain->agaw)); 632257251Skib KASSERT(base + size > base, 633284869Skib ("size overflow %p %jx %jx", domain, (uintmax_t)base, 634257251Skib (uintmax_t)size)); 635257251Skib KASSERT((flags & ~DMAR_PGF_WAITOK) == 0, ("invalid flags %x", flags)); 636257251Skib 637257900Sdim pg_sz = 0; /* silence gcc */ 638257251Skib flags |= DMAR_PGF_OBJL; 639257251Skib TD_PREP_PINNED_ASSERT; 640257251Skib 641257251Skib for (sf = NULL; size > 0; base += pg_sz, size -= pg_sz) { 642284869Skib for (lvl = 0; lvl < domain->pglvl; lvl++) { 643284869Skib if (lvl != domain->pglvl - 1 && 644284869Skib !domain_is_sp_lvl(domain, lvl)) 645257251Skib continue; 646284869Skib pg_sz = domain_page_size(domain, lvl); 647257251Skib if (pg_sz > size) 648257251Skib continue; 649284869Skib pte = domain_pgtbl_map_pte(domain, base, lvl, flags, 650257251Skib &idx, &sf); 651257251Skib KASSERT(pte != NULL, 652257251Skib ("sleeping or page missed %p %jx %d 0x%x", 653284869Skib domain, (uintmax_t)base, lvl, flags)); 654257251Skib if ((pte->pte & DMAR_PTE_SP) != 0 || 655284869Skib lvl == domain->pglvl - 1) { 656284869Skib domain_unmap_clear_pte(domain, base, lvl, 657284869Skib flags, pte, &sf, false); 658257251Skib break; 659257251Skib } 660257251Skib } 661257251Skib KASSERT(size >= pg_sz, 662284869Skib ("unmapping loop overflow %p %jx %jx %jx", domain, 663257251Skib (uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz)); 664257251Skib } 665257251Skib if (sf != NULL) 666277023Skib dmar_unmap_pgtbl(sf); 667257251Skib /* 668257251Skib * See 11.1 Write Buffer Flushing for an explanation why RWBF 669257251Skib * can be ignored there. 670257251Skib */ 671257251Skib 672257251Skib TD_PINNED_ASSERT; 673257251Skib return (0); 674257251Skib} 675257251Skib 676257251Skibint 677284869Skibdomain_unmap_buf(struct dmar_domain *domain, dmar_gaddr_t base, 678284869Skib dmar_gaddr_t size, int flags) 679257251Skib{ 680257512Skib int error; 681257251Skib 682284869Skib DMAR_DOMAIN_PGLOCK(domain); 683284869Skib error = domain_unmap_buf_locked(domain, base, size, flags); 684284869Skib DMAR_DOMAIN_PGUNLOCK(domain); 685257512Skib return (error); 686257251Skib} 687257251Skib 688257251Skibint 689284869Skibdomain_alloc_pgtbl(struct dmar_domain *domain) 690257251Skib{ 691257251Skib vm_page_t m; 692257251Skib 693284869Skib KASSERT(domain->pgtbl_obj == NULL, 694284869Skib ("already initialized %p", domain)); 695257251Skib 696284869Skib domain->pgtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL, 697284869Skib IDX_TO_OFF(pglvl_max_pages(domain->pglvl)), 0, 0, NULL); 698284869Skib DMAR_DOMAIN_PGLOCK(domain); 699284869Skib m = dmar_pgalloc(domain->pgtbl_obj, 0, DMAR_PGF_WAITOK | 700257251Skib DMAR_PGF_ZERO | DMAR_PGF_OBJL); 701257251Skib /* No implicit free of the top level page table page. */ 702257251Skib m->wire_count = 1; 703284869Skib DMAR_DOMAIN_PGUNLOCK(domain); 704284869Skib DMAR_LOCK(domain->dmar); 705284869Skib domain->flags |= DMAR_DOMAIN_PGTBL_INITED; 706284869Skib DMAR_UNLOCK(domain->dmar); 707257251Skib return (0); 708257251Skib} 709257251Skib 710257251Skibvoid 711284869Skibdomain_free_pgtbl(struct dmar_domain *domain) 712257251Skib{ 713257251Skib vm_object_t obj; 714257251Skib vm_page_t m; 715257251Skib 716284869Skib obj = domain->pgtbl_obj; 717257251Skib if (obj == NULL) { 718284869Skib KASSERT((domain->dmar->hw_ecap & DMAR_ECAP_PT) != 0 && 719284869Skib (domain->flags & DMAR_DOMAIN_IDMAP) != 0, 720284869Skib ("lost pagetable object domain %p", domain)); 721257251Skib return; 722257251Skib } 723284869Skib DMAR_DOMAIN_ASSERT_PGLOCKED(domain); 724284869Skib domain->pgtbl_obj = NULL; 725257251Skib 726284869Skib if ((domain->flags & DMAR_DOMAIN_IDMAP) != 0) { 727257251Skib put_idmap_pgtbl(obj); 728284869Skib domain->flags &= ~DMAR_DOMAIN_IDMAP; 729257251Skib return; 730257251Skib } 731257251Skib 732257251Skib /* Obliterate wire_counts */ 733257251Skib VM_OBJECT_ASSERT_WLOCKED(obj); 734257251Skib for (m = vm_page_lookup(obj, 0); m != NULL; m = vm_page_next(m)) 735257251Skib m->wire_count = 0; 736257251Skib VM_OBJECT_WUNLOCK(obj); 737257251Skib vm_object_deallocate(obj); 738257251Skib} 739257251Skib 740257251Skibstatic inline uint64_t 741284869Skibdomain_wait_iotlb_flush(struct dmar_unit *unit, uint64_t wt, int iro) 742257251Skib{ 743257251Skib uint64_t iotlbr; 744257251Skib 745257251Skib dmar_write8(unit, iro + DMAR_IOTLB_REG_OFF, DMAR_IOTLB_IVT | 746257251Skib DMAR_IOTLB_DR | DMAR_IOTLB_DW | wt); 747257251Skib for (;;) { 748257251Skib iotlbr = dmar_read8(unit, iro + DMAR_IOTLB_REG_OFF); 749257251Skib if ((iotlbr & DMAR_IOTLB_IVT) == 0) 750257251Skib break; 751257251Skib cpu_spinwait(); 752257251Skib } 753257251Skib return (iotlbr); 754257251Skib} 755257251Skib 756257512Skibvoid 757284869Skibdomain_flush_iotlb_sync(struct dmar_domain *domain, dmar_gaddr_t base, 758284869Skib dmar_gaddr_t size) 759257251Skib{ 760257251Skib struct dmar_unit *unit; 761257251Skib dmar_gaddr_t isize; 762257251Skib uint64_t iotlbr; 763257251Skib int am, iro; 764257251Skib 765284869Skib unit = domain->dmar; 766257512Skib KASSERT(!unit->qi_enabled, ("dmar%d: sync iotlb flush call", 767257512Skib unit->unit)); 768257251Skib iro = DMAR_ECAP_IRO(unit->hw_ecap) * 16; 769257251Skib DMAR_LOCK(unit); 770257251Skib if ((unit->hw_cap & DMAR_CAP_PSI) == 0 || size > 2 * 1024 * 1024) { 771284869Skib iotlbr = domain_wait_iotlb_flush(unit, DMAR_IOTLB_IIRG_DOM | 772284869Skib DMAR_IOTLB_DID(domain->domain), iro); 773257251Skib KASSERT((iotlbr & DMAR_IOTLB_IAIG_MASK) != 774257251Skib DMAR_IOTLB_IAIG_INVLD, 775257251Skib ("dmar%d: invalidation failed %jx", unit->unit, 776257251Skib (uintmax_t)iotlbr)); 777257251Skib } else { 778257251Skib for (; size > 0; base += isize, size -= isize) { 779257512Skib am = calc_am(unit, base, size, &isize); 780257251Skib dmar_write8(unit, iro, base | am); 781284869Skib iotlbr = domain_wait_iotlb_flush(unit, 782284869Skib DMAR_IOTLB_IIRG_PAGE | 783284869Skib DMAR_IOTLB_DID(domain->domain), iro); 784257251Skib KASSERT((iotlbr & DMAR_IOTLB_IAIG_MASK) != 785257251Skib DMAR_IOTLB_IAIG_INVLD, 786257251Skib ("dmar%d: PSI invalidation failed " 787257251Skib "iotlbr 0x%jx base 0x%jx size 0x%jx am %d", 788257251Skib unit->unit, (uintmax_t)iotlbr, 789257251Skib (uintmax_t)base, (uintmax_t)size, am)); 790257251Skib /* 791257251Skib * Any non-page granularity covers whole guest 792257251Skib * address space for the domain. 793257251Skib */ 794257251Skib if ((iotlbr & DMAR_IOTLB_IAIG_MASK) != 795257251Skib DMAR_IOTLB_IAIG_PAGE) 796257251Skib break; 797257251Skib } 798257251Skib } 799257251Skib DMAR_UNLOCK(unit); 800257251Skib} 801