1257251Skib/*- 2257251Skib * Copyright (c) 2013 The FreeBSD Foundation 3257251Skib * All rights reserved. 4257251Skib * 5257251Skib * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 6257251Skib * under sponsorship from the FreeBSD Foundation. 7257251Skib * 8257251Skib * Redistribution and use in source and binary forms, with or without 9257251Skib * modification, are permitted provided that the following conditions 10257251Skib * are met: 11257251Skib * 1. Redistributions of source code must retain the above copyright 12257251Skib * notice, this list of conditions and the following disclaimer. 13257251Skib * 2. Redistributions in binary form must reproduce the above copyright 14257251Skib * notice, this list of conditions and the following disclaimer in the 15257251Skib * documentation and/or other materials provided with the distribution. 16257251Skib * 17257251Skib * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18257251Skib * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19257251Skib * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20257251Skib * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21257251Skib * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22257251Skib * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23257251Skib * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24257251Skib * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25257251Skib * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26257251Skib * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27257251Skib * SUCH DAMAGE. 28257251Skib */ 29257251Skib 30257251Skib#include <sys/cdefs.h> 31257251Skib__FBSDID("$FreeBSD$"); 32257251Skib 33257251Skib#include <sys/param.h> 34257251Skib#include <sys/systm.h> 35257251Skib#include <sys/malloc.h> 36257251Skib#include <sys/bus.h> 37257251Skib#include <sys/interrupt.h> 38257251Skib#include <sys/kernel.h> 39257251Skib#include <sys/ktr.h> 40257251Skib#include <sys/lock.h> 41257251Skib#include <sys/memdesc.h> 42257251Skib#include <sys/mutex.h> 43257251Skib#include <sys/proc.h> 44257251Skib#include <sys/rwlock.h> 45257251Skib#include <sys/rman.h> 46257251Skib#include <sys/sf_buf.h> 47257251Skib#include <sys/sysctl.h> 48257251Skib#include <sys/taskqueue.h> 49257251Skib#include <sys/tree.h> 50257251Skib#include <sys/uio.h> 51257251Skib#include <vm/vm.h> 52257251Skib#include <vm/vm_extern.h> 53257251Skib#include <vm/vm_kern.h> 54257251Skib#include <vm/vm_object.h> 55257251Skib#include <vm/vm_page.h> 56257251Skib#include <vm/vm_pager.h> 57257251Skib#include <vm/vm_map.h> 58257251Skib#include <machine/atomic.h> 59257251Skib#include <machine/bus.h> 60257251Skib#include <machine/cpu.h> 61257251Skib#include <machine/md_var.h> 62257251Skib#include <machine/specialreg.h> 63257251Skib#include <x86/include/busdma_impl.h> 64257251Skib#include <x86/iommu/intel_reg.h> 65257251Skib#include <x86/iommu/busdma_dmar.h> 66257251Skib#include <x86/iommu/intel_dmar.h> 67257251Skib 68257251Skibstatic int ctx_unmap_buf_locked(struct dmar_ctx *ctx, dmar_gaddr_t base, 69257251Skib dmar_gaddr_t size, int flags); 70257251Skib 71257251Skib/* 72257251Skib * The cache of the identity mapping page tables for the DMARs. Using 73257251Skib * the cache saves significant amount of memory for page tables by 74257251Skib * reusing the page tables, since usually DMARs are identical and have 75257251Skib * the same capabilities. Still, cache records the information needed 76257251Skib * to match DMAR capabilities and page table format, to correctly 77257251Skib * handle different DMARs. 78257251Skib */ 79257251Skib 80257251Skibstruct idpgtbl { 81257251Skib dmar_gaddr_t maxaddr; /* Page table covers the guest address 82257251Skib range [0..maxaddr) */ 83257251Skib int pglvl; /* Total page table levels ignoring 84257251Skib superpages */ 85257251Skib int leaf; /* The last materialized page table 86257251Skib level, it is non-zero if superpages 87257251Skib are supported */ 88257251Skib vm_object_t pgtbl_obj; /* The page table pages */ 89257251Skib LIST_ENTRY(idpgtbl) link; 90257251Skib}; 91257251Skib 92257251Skibstatic struct sx idpgtbl_lock; 93257251SkibSX_SYSINIT(idpgtbl, &idpgtbl_lock, "idpgtbl"); 94257251Skibstatic LIST_HEAD(, idpgtbl) idpgtbls = LIST_HEAD_INITIALIZER(idpgtbls); 95257251Skibstatic MALLOC_DEFINE(M_DMAR_IDPGTBL, "dmar_idpgtbl", 96257251Skib "Intel DMAR Identity mappings cache elements"); 97257251Skib 98257251Skib/* 99257251Skib * Build the next level of the page tables for the identity mapping. 100257251Skib * - lvl is the level to build; 101257251Skib * - idx is the index of the page table page in the pgtbl_obj, which is 102257251Skib * being allocated filled now; 103257251Skib * - addr is the starting address in the bus address space which is 104257251Skib * mapped by the page table page. 105257251Skib */ 106257251Skibstatic void 107257251Skibctx_idmap_nextlvl(struct idpgtbl *tbl, int lvl, vm_pindex_t idx, 108257251Skib dmar_gaddr_t addr) 109257251Skib{ 110284021Skib vm_page_t m1; 111257251Skib dmar_pte_t *pte; 112257251Skib struct sf_buf *sf; 113257251Skib dmar_gaddr_t f, pg_sz; 114257251Skib vm_pindex_t base; 115257251Skib int i; 116257251Skib 117257251Skib VM_OBJECT_ASSERT_LOCKED(tbl->pgtbl_obj); 118257251Skib if (addr >= tbl->maxaddr) 119257251Skib return; 120284021Skib (void)dmar_pgalloc(tbl->pgtbl_obj, idx, DMAR_PGF_OBJL | DMAR_PGF_WAITOK | 121257251Skib DMAR_PGF_ZERO); 122257251Skib base = idx * DMAR_NPTEPG + 1; /* Index of the first child page of idx */ 123257251Skib pg_sz = pglvl_page_size(tbl->pglvl, lvl); 124257251Skib if (lvl != tbl->leaf) { 125257251Skib for (i = 0, f = addr; i < DMAR_NPTEPG; i++, f += pg_sz) 126257251Skib ctx_idmap_nextlvl(tbl, lvl + 1, base + i, f); 127257251Skib } 128257251Skib VM_OBJECT_WUNLOCK(tbl->pgtbl_obj); 129257251Skib pte = dmar_map_pgtbl(tbl->pgtbl_obj, idx, DMAR_PGF_WAITOK, &sf); 130257251Skib if (lvl == tbl->leaf) { 131257251Skib for (i = 0, f = addr; i < DMAR_NPTEPG; i++, f += pg_sz) { 132257251Skib if (f >= tbl->maxaddr) 133257251Skib break; 134257251Skib pte[i].pte = (DMAR_PTE_ADDR_MASK & f) | 135257251Skib DMAR_PTE_R | DMAR_PTE_W; 136257251Skib } 137257251Skib } else { 138257251Skib for (i = 0, f = addr; i < DMAR_NPTEPG; i++, f += pg_sz) { 139257251Skib if (f >= tbl->maxaddr) 140257251Skib break; 141257251Skib m1 = dmar_pgalloc(tbl->pgtbl_obj, base + i, 142257251Skib DMAR_PGF_NOALLOC); 143257251Skib KASSERT(m1 != NULL, ("lost page table page")); 144257251Skib pte[i].pte = (DMAR_PTE_ADDR_MASK & 145257251Skib VM_PAGE_TO_PHYS(m1)) | DMAR_PTE_R | DMAR_PTE_W; 146257251Skib } 147257251Skib } 148257251Skib /* ctx_get_idmap_pgtbl flushes CPU cache if needed. */ 149277315Skib dmar_unmap_pgtbl(sf); 150257251Skib VM_OBJECT_WLOCK(tbl->pgtbl_obj); 151257251Skib} 152257251Skib 153257251Skib/* 154257251Skib * Find a ready and compatible identity-mapping page table in the 155257251Skib * cache. If not found, populate the identity-mapping page table for 156257251Skib * the context, up to the maxaddr. The maxaddr byte is allowed to be 157257251Skib * not mapped, which is aligned with the definition of Maxmem as the 158257251Skib * highest usable physical address + 1. If superpages are used, the 159257251Skib * maxaddr is typically mapped. 160257251Skib */ 161257251Skibvm_object_t 162257251Skibctx_get_idmap_pgtbl(struct dmar_ctx *ctx, dmar_gaddr_t maxaddr) 163257251Skib{ 164257251Skib struct dmar_unit *unit; 165257251Skib struct idpgtbl *tbl; 166257251Skib vm_object_t res; 167257251Skib vm_page_t m; 168257251Skib int leaf, i; 169257251Skib 170259512Skib leaf = 0; /* silence gcc */ 171259512Skib 172257251Skib /* 173257251Skib * First, determine where to stop the paging structures. 174257251Skib */ 175257251Skib for (i = 0; i < ctx->pglvl; i++) { 176257251Skib if (i == ctx->pglvl - 1 || ctx_is_sp_lvl(ctx, i)) { 177257251Skib leaf = i; 178257251Skib break; 179257251Skib } 180257251Skib } 181257251Skib 182257251Skib /* 183257251Skib * Search the cache for a compatible page table. Qualified 184257251Skib * page table must map up to maxaddr, its level must be 185257251Skib * supported by the DMAR and leaf should be equal to the 186257251Skib * calculated value. The later restriction could be lifted 187257251Skib * but I believe it is currently impossible to have any 188257251Skib * deviations for existing hardware. 189257251Skib */ 190257251Skib sx_slock(&idpgtbl_lock); 191257251Skib LIST_FOREACH(tbl, &idpgtbls, link) { 192257251Skib if (tbl->maxaddr >= maxaddr && 193257251Skib dmar_pglvl_supported(ctx->dmar, tbl->pglvl) && 194257251Skib tbl->leaf == leaf) { 195257251Skib res = tbl->pgtbl_obj; 196257251Skib vm_object_reference(res); 197257251Skib sx_sunlock(&idpgtbl_lock); 198257251Skib ctx->pglvl = tbl->pglvl; /* XXXKIB ? */ 199257251Skib goto end; 200257251Skib } 201257251Skib } 202257251Skib 203257251Skib /* 204257251Skib * Not found in cache, relock the cache into exclusive mode to 205257251Skib * be able to add element, and recheck cache again after the 206257251Skib * relock. 207257251Skib */ 208257251Skib sx_sunlock(&idpgtbl_lock); 209257251Skib sx_xlock(&idpgtbl_lock); 210257251Skib LIST_FOREACH(tbl, &idpgtbls, link) { 211257251Skib if (tbl->maxaddr >= maxaddr && 212257251Skib dmar_pglvl_supported(ctx->dmar, tbl->pglvl) && 213257251Skib tbl->leaf == leaf) { 214257251Skib res = tbl->pgtbl_obj; 215257251Skib vm_object_reference(res); 216257251Skib sx_xunlock(&idpgtbl_lock); 217257251Skib ctx->pglvl = tbl->pglvl; /* XXXKIB ? */ 218257251Skib return (res); 219257251Skib } 220257251Skib } 221257251Skib 222257251Skib /* 223257251Skib * Still not found, create new page table. 224257251Skib */ 225257251Skib tbl = malloc(sizeof(*tbl), M_DMAR_IDPGTBL, M_WAITOK); 226257251Skib tbl->pglvl = ctx->pglvl; 227257251Skib tbl->leaf = leaf; 228257251Skib tbl->maxaddr = maxaddr; 229257251Skib tbl->pgtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL, 230257251Skib IDX_TO_OFF(pglvl_max_pages(tbl->pglvl)), 0, 0, NULL); 231257251Skib VM_OBJECT_WLOCK(tbl->pgtbl_obj); 232257251Skib ctx_idmap_nextlvl(tbl, 0, 0, 0); 233257251Skib VM_OBJECT_WUNLOCK(tbl->pgtbl_obj); 234257251Skib LIST_INSERT_HEAD(&idpgtbls, tbl, link); 235257251Skib res = tbl->pgtbl_obj; 236257251Skib vm_object_reference(res); 237257251Skib sx_xunlock(&idpgtbl_lock); 238257251Skib 239257251Skibend: 240257251Skib /* 241257251Skib * Table was found or created. 242257251Skib * 243257251Skib * If DMAR does not snoop paging structures accesses, flush 244257251Skib * CPU cache to memory. Note that dmar_unmap_pgtbl() coherent 245257251Skib * argument was possibly invalid at the time of the identity 246257251Skib * page table creation, since DMAR which was passed at the 247257251Skib * time of creation could be coherent, while current DMAR is 248257251Skib * not. 249257251Skib * 250257251Skib * If DMAR cannot look into the chipset write buffer, flush it 251257251Skib * as well. 252257251Skib */ 253257251Skib unit = ctx->dmar; 254257251Skib if (!DMAR_IS_COHERENT(unit)) { 255257251Skib VM_OBJECT_WLOCK(res); 256257251Skib for (m = vm_page_lookup(res, 0); m != NULL; 257257251Skib m = vm_page_next(m)) 258257251Skib pmap_invalidate_cache_pages(&m, 1); 259257251Skib VM_OBJECT_WUNLOCK(res); 260257251Skib } 261257251Skib if ((unit->hw_cap & DMAR_CAP_RWBF) != 0) { 262257251Skib DMAR_LOCK(unit); 263257251Skib dmar_flush_write_bufs(unit); 264257251Skib DMAR_UNLOCK(unit); 265257251Skib } 266257251Skib 267257251Skib return (res); 268257251Skib} 269257251Skib 270257251Skib/* 271257251Skib * Return a reference to the identity mapping page table to the cache. 272257251Skib */ 273257251Skibvoid 274257251Skibput_idmap_pgtbl(vm_object_t obj) 275257251Skib{ 276257251Skib struct idpgtbl *tbl, *tbl1; 277257251Skib vm_object_t rmobj; 278257251Skib 279257251Skib sx_slock(&idpgtbl_lock); 280257251Skib KASSERT(obj->ref_count >= 2, ("lost cache reference")); 281257251Skib vm_object_deallocate(obj); 282257251Skib 283257251Skib /* 284257251Skib * Cache always owns one last reference on the page table object. 285257251Skib * If there is an additional reference, object must stay. 286257251Skib */ 287257251Skib if (obj->ref_count > 1) { 288257251Skib sx_sunlock(&idpgtbl_lock); 289257251Skib return; 290257251Skib } 291257251Skib 292257251Skib /* 293257251Skib * Cache reference is the last, remove cache element and free 294257251Skib * page table object, returning the page table pages to the 295257251Skib * system. 296257251Skib */ 297257251Skib sx_sunlock(&idpgtbl_lock); 298257251Skib sx_xlock(&idpgtbl_lock); 299257251Skib LIST_FOREACH_SAFE(tbl, &idpgtbls, link, tbl1) { 300257251Skib rmobj = tbl->pgtbl_obj; 301257251Skib if (rmobj->ref_count == 1) { 302257251Skib LIST_REMOVE(tbl, link); 303257251Skib atomic_subtract_int(&dmar_tbl_pagecnt, 304257251Skib rmobj->resident_page_count); 305257251Skib vm_object_deallocate(rmobj); 306257251Skib free(tbl, M_DMAR_IDPGTBL); 307257251Skib } 308257251Skib } 309257251Skib sx_xunlock(&idpgtbl_lock); 310257251Skib} 311257251Skib 312257251Skib/* 313257251Skib * The core routines to map and unmap host pages at the given guest 314257251Skib * address. Support superpages. 315257251Skib */ 316257251Skib 317257251Skib/* 318257251Skib * Index of the pte for the guest address base in the page table at 319257251Skib * the level lvl. 320257251Skib */ 321257251Skibstatic int 322257251Skibctx_pgtbl_pte_off(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl) 323257251Skib{ 324257251Skib 325257251Skib base >>= DMAR_PAGE_SHIFT + (ctx->pglvl - lvl - 1) * DMAR_NPTEPGSHIFT; 326257251Skib return (base & DMAR_PTEMASK); 327257251Skib} 328257251Skib 329257251Skib/* 330257251Skib * Returns the page index of the page table page in the page table 331257251Skib * object, which maps the given address base at the page table level 332257251Skib * lvl. 333257251Skib */ 334257251Skibstatic vm_pindex_t 335257251Skibctx_pgtbl_get_pindex(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl) 336257251Skib{ 337257251Skib vm_pindex_t idx, pidx; 338257251Skib int i; 339257251Skib 340257251Skib KASSERT(lvl >= 0 && lvl < ctx->pglvl, ("wrong lvl %p %d", ctx, lvl)); 341257251Skib 342257251Skib for (pidx = idx = 0, i = 0; i < lvl; i++, pidx = idx) 343257251Skib idx = ctx_pgtbl_pte_off(ctx, base, i) + pidx * DMAR_NPTEPG + 1; 344257251Skib return (idx); 345257251Skib} 346257251Skib 347257251Skibstatic dmar_pte_t * 348257251Skibctx_pgtbl_map_pte(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl, int flags, 349257251Skib vm_pindex_t *idxp, struct sf_buf **sf) 350257251Skib{ 351257251Skib vm_page_t m; 352257251Skib struct sf_buf *sfp; 353257251Skib dmar_pte_t *pte, *ptep; 354257251Skib vm_pindex_t idx, idx1; 355257251Skib 356257251Skib DMAR_CTX_ASSERT_PGLOCKED(ctx); 357257251Skib KASSERT((flags & DMAR_PGF_OBJL) != 0, ("lost PGF_OBJL")); 358257251Skib 359257251Skib idx = ctx_pgtbl_get_pindex(ctx, base, lvl); 360257251Skib if (*sf != NULL && idx == *idxp) { 361257251Skib pte = (dmar_pte_t *)sf_buf_kva(*sf); 362257251Skib } else { 363257251Skib if (*sf != NULL) 364277315Skib dmar_unmap_pgtbl(*sf); 365257251Skib *idxp = idx; 366257251Skibretry: 367257251Skib pte = dmar_map_pgtbl(ctx->pgtbl_obj, idx, flags, sf); 368257251Skib if (pte == NULL) { 369257251Skib KASSERT(lvl > 0, ("lost root page table page %p", ctx)); 370257251Skib /* 371286854Skib * Page table page does not exist, allocate 372286854Skib * it and create a pte in the preceeding page level 373286854Skib * to reference the allocated page table page. 374257251Skib */ 375257251Skib m = dmar_pgalloc(ctx->pgtbl_obj, idx, flags | 376257251Skib DMAR_PGF_ZERO); 377257251Skib if (m == NULL) 378257251Skib return (NULL); 379257251Skib 380257251Skib /* 381257251Skib * Prevent potential free while pgtbl_obj is 382257251Skib * unlocked in the recursive call to 383257251Skib * ctx_pgtbl_map_pte(), if other thread did 384257251Skib * pte write and clean while the lock if 385257251Skib * dropped. 386257251Skib */ 387257251Skib m->wire_count++; 388257251Skib 389257251Skib sfp = NULL; 390257251Skib ptep = ctx_pgtbl_map_pte(ctx, base, lvl - 1, flags, 391257251Skib &idx1, &sfp); 392257251Skib if (ptep == NULL) { 393257251Skib KASSERT(m->pindex != 0, 394257251Skib ("loosing root page %p", ctx)); 395257251Skib m->wire_count--; 396257251Skib dmar_pgfree(ctx->pgtbl_obj, m->pindex, flags); 397257251Skib return (NULL); 398257251Skib } 399257251Skib dmar_pte_store(&ptep->pte, DMAR_PTE_R | DMAR_PTE_W | 400257251Skib VM_PAGE_TO_PHYS(m)); 401277315Skib dmar_flush_pte_to_ram(ctx->dmar, ptep); 402257251Skib sf_buf_page(sfp)->wire_count += 1; 403257251Skib m->wire_count--; 404277315Skib dmar_unmap_pgtbl(sfp); 405257251Skib /* Only executed once. */ 406257251Skib goto retry; 407257251Skib } 408257251Skib } 409257251Skib pte += ctx_pgtbl_pte_off(ctx, base, lvl); 410257251Skib return (pte); 411257251Skib} 412257251Skib 413257251Skibstatic int 414257251Skibctx_map_buf_locked(struct dmar_ctx *ctx, dmar_gaddr_t base, dmar_gaddr_t size, 415257251Skib vm_page_t *ma, uint64_t pflags, int flags) 416257251Skib{ 417257251Skib dmar_pte_t *pte; 418257251Skib struct sf_buf *sf; 419257251Skib dmar_gaddr_t pg_sz, base1, size1; 420257251Skib vm_pindex_t pi, c, idx, run_sz; 421257251Skib int lvl; 422257251Skib bool superpage; 423257251Skib 424257251Skib DMAR_CTX_ASSERT_PGLOCKED(ctx); 425257251Skib 426257251Skib base1 = base; 427257251Skib size1 = size; 428257251Skib flags |= DMAR_PGF_OBJL; 429257251Skib TD_PREP_PINNED_ASSERT; 430257251Skib 431257251Skib for (sf = NULL, pi = 0; size > 0; base += pg_sz, size -= pg_sz, 432257251Skib pi += run_sz) { 433257251Skib for (lvl = 0, c = 0, superpage = false;; lvl++) { 434257251Skib pg_sz = ctx_page_size(ctx, lvl); 435257251Skib run_sz = pg_sz >> DMAR_PAGE_SHIFT; 436257251Skib if (lvl == ctx->pglvl - 1) 437257251Skib break; 438257251Skib /* 439257251Skib * Check if the current base suitable for the 440257251Skib * superpage mapping. First, verify the level. 441257251Skib */ 442257251Skib if (!ctx_is_sp_lvl(ctx, lvl)) 443257251Skib continue; 444257251Skib /* 445257251Skib * Next, look at the size of the mapping and 446257251Skib * alignment of both guest and host addresses. 447257251Skib */ 448257251Skib if (size < pg_sz || (base & (pg_sz - 1)) != 0 || 449257251Skib (VM_PAGE_TO_PHYS(ma[pi]) & (pg_sz - 1)) != 0) 450257251Skib continue; 451257251Skib /* All passed, check host pages contiguouty. */ 452257251Skib if (c == 0) { 453257251Skib for (c = 1; c < run_sz; c++) { 454257251Skib if (VM_PAGE_TO_PHYS(ma[pi + c]) != 455257251Skib VM_PAGE_TO_PHYS(ma[pi + c - 1]) + 456257251Skib PAGE_SIZE) 457257251Skib break; 458257251Skib } 459257251Skib } 460257251Skib if (c >= run_sz) { 461257251Skib superpage = true; 462257251Skib break; 463257251Skib } 464257251Skib } 465257251Skib KASSERT(size >= pg_sz, 466257251Skib ("mapping loop overflow %p %jx %jx %jx", ctx, 467257251Skib (uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz)); 468280873Skib KASSERT(pg_sz > 0, ("pg_sz 0 lvl %d", lvl)); 469257251Skib pte = ctx_pgtbl_map_pte(ctx, base, lvl, flags, &idx, &sf); 470257251Skib if (pte == NULL) { 471257251Skib KASSERT((flags & DMAR_PGF_WAITOK) == 0, 472257251Skib ("failed waitable pte alloc %p", ctx)); 473277315Skib if (sf != NULL) 474277315Skib dmar_unmap_pgtbl(sf); 475257251Skib ctx_unmap_buf_locked(ctx, base1, base - base1, flags); 476257251Skib TD_PINNED_ASSERT; 477257251Skib return (ENOMEM); 478257251Skib } 479257251Skib dmar_pte_store(&pte->pte, VM_PAGE_TO_PHYS(ma[pi]) | pflags | 480257251Skib (superpage ? DMAR_PTE_SP : 0)); 481277315Skib dmar_flush_pte_to_ram(ctx->dmar, pte); 482257251Skib sf_buf_page(sf)->wire_count += 1; 483257251Skib } 484257251Skib if (sf != NULL) 485277315Skib dmar_unmap_pgtbl(sf); 486257251Skib TD_PINNED_ASSERT; 487257251Skib return (0); 488257251Skib} 489257251Skib 490257251Skibint 491257251Skibctx_map_buf(struct dmar_ctx *ctx, dmar_gaddr_t base, dmar_gaddr_t size, 492257251Skib vm_page_t *ma, uint64_t pflags, int flags) 493257251Skib{ 494259512Skib struct dmar_unit *unit; 495259512Skib int error; 496257251Skib 497259512Skib unit = ctx->dmar; 498259512Skib 499257251Skib KASSERT((ctx->flags & DMAR_CTX_IDMAP) == 0, 500257251Skib ("modifying idmap pagetable ctx %p", ctx)); 501257251Skib KASSERT((base & DMAR_PAGE_MASK) == 0, 502257251Skib ("non-aligned base %p %jx %jx", ctx, (uintmax_t)base, 503257251Skib (uintmax_t)size)); 504257251Skib KASSERT((size & DMAR_PAGE_MASK) == 0, 505257251Skib ("non-aligned size %p %jx %jx", ctx, (uintmax_t)base, 506257251Skib (uintmax_t)size)); 507257251Skib KASSERT(size > 0, ("zero size %p %jx %jx", ctx, (uintmax_t)base, 508257251Skib (uintmax_t)size)); 509257251Skib KASSERT(base < (1ULL << ctx->agaw), 510257251Skib ("base too high %p %jx %jx agaw %d", ctx, (uintmax_t)base, 511257251Skib (uintmax_t)size, ctx->agaw)); 512257251Skib KASSERT(base + size < (1ULL << ctx->agaw), 513257251Skib ("end too high %p %jx %jx agaw %d", ctx, (uintmax_t)base, 514257251Skib (uintmax_t)size, ctx->agaw)); 515257251Skib KASSERT(base + size > base, 516257251Skib ("size overflow %p %jx %jx", ctx, (uintmax_t)base, 517257251Skib (uintmax_t)size)); 518257251Skib KASSERT((pflags & (DMAR_PTE_R | DMAR_PTE_W)) != 0, 519257251Skib ("neither read nor write %jx", (uintmax_t)pflags)); 520257251Skib KASSERT((pflags & ~(DMAR_PTE_R | DMAR_PTE_W | DMAR_PTE_SNP | 521257251Skib DMAR_PTE_TM)) == 0, 522257251Skib ("invalid pte flags %jx", (uintmax_t)pflags)); 523257251Skib KASSERT((pflags & DMAR_PTE_SNP) == 0 || 524259512Skib (unit->hw_ecap & DMAR_ECAP_SC) != 0, 525257251Skib ("PTE_SNP for dmar without snoop control %p %jx", 526257251Skib ctx, (uintmax_t)pflags)); 527257251Skib KASSERT((pflags & DMAR_PTE_TM) == 0 || 528259512Skib (unit->hw_ecap & DMAR_ECAP_DI) != 0, 529257251Skib ("PTE_TM for dmar without DIOTLB %p %jx", 530257251Skib ctx, (uintmax_t)pflags)); 531257251Skib KASSERT((flags & ~DMAR_PGF_WAITOK) == 0, ("invalid flags %x", flags)); 532257251Skib 533257251Skib DMAR_CTX_PGLOCK(ctx); 534259512Skib error = ctx_map_buf_locked(ctx, base, size, ma, pflags, flags); 535259512Skib DMAR_CTX_PGUNLOCK(ctx); 536259512Skib if (error != 0) 537259512Skib return (error); 538259512Skib 539259512Skib if ((unit->hw_cap & DMAR_CAP_CM) != 0) 540259512Skib ctx_flush_iotlb_sync(ctx, base, size); 541259512Skib else if ((unit->hw_cap & DMAR_CAP_RWBF) != 0) { 542259512Skib /* See 11.1 Write Buffer Flushing. */ 543259512Skib DMAR_LOCK(unit); 544259512Skib dmar_flush_write_bufs(unit); 545259512Skib DMAR_UNLOCK(unit); 546259512Skib } 547259512Skib return (0); 548257251Skib} 549257251Skib 550257251Skibstatic void ctx_unmap_clear_pte(struct dmar_ctx *ctx, dmar_gaddr_t base, 551257251Skib int lvl, int flags, dmar_pte_t *pte, struct sf_buf **sf, bool free_fs); 552257251Skib 553257251Skibstatic void 554257251Skibctx_free_pgtbl_pde(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl, int flags) 555257251Skib{ 556257251Skib struct sf_buf *sf; 557257251Skib dmar_pte_t *pde; 558257251Skib vm_pindex_t idx; 559257251Skib 560257251Skib sf = NULL; 561257251Skib pde = ctx_pgtbl_map_pte(ctx, base, lvl, flags, &idx, &sf); 562257251Skib ctx_unmap_clear_pte(ctx, base, lvl, flags, pde, &sf, true); 563257251Skib} 564257251Skib 565257251Skibstatic void 566257251Skibctx_unmap_clear_pte(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl, 567257251Skib int flags, dmar_pte_t *pte, struct sf_buf **sf, bool free_sf) 568257251Skib{ 569257251Skib vm_page_t m; 570257251Skib 571257251Skib dmar_pte_clear(&pte->pte); 572277315Skib dmar_flush_pte_to_ram(ctx->dmar, pte); 573257251Skib m = sf_buf_page(*sf); 574257251Skib if (free_sf) { 575277315Skib dmar_unmap_pgtbl(*sf); 576257251Skib *sf = NULL; 577257251Skib } 578257251Skib m->wire_count--; 579257251Skib if (m->wire_count != 0) 580257251Skib return; 581257251Skib KASSERT(lvl != 0, 582257251Skib ("lost reference (lvl) on root pg ctx %p base %jx lvl %d", 583257251Skib ctx, (uintmax_t)base, lvl)); 584257251Skib KASSERT(m->pindex != 0, 585257251Skib ("lost reference (idx) on root pg ctx %p base %jx lvl %d", 586257251Skib ctx, (uintmax_t)base, lvl)); 587257251Skib dmar_pgfree(ctx->pgtbl_obj, m->pindex, flags); 588257251Skib ctx_free_pgtbl_pde(ctx, base, lvl - 1, flags); 589257251Skib} 590257251Skib 591257251Skib/* 592257251Skib * Assumes that the unmap is never partial. 593257251Skib */ 594257251Skibstatic int 595257251Skibctx_unmap_buf_locked(struct dmar_ctx *ctx, dmar_gaddr_t base, 596257251Skib dmar_gaddr_t size, int flags) 597257251Skib{ 598257251Skib dmar_pte_t *pte; 599257251Skib struct sf_buf *sf; 600257251Skib vm_pindex_t idx; 601284021Skib dmar_gaddr_t pg_sz; 602257251Skib int lvl; 603257251Skib 604257251Skib DMAR_CTX_ASSERT_PGLOCKED(ctx); 605257251Skib if (size == 0) 606257251Skib return (0); 607257251Skib 608257251Skib KASSERT((ctx->flags & DMAR_CTX_IDMAP) == 0, 609257251Skib ("modifying idmap pagetable ctx %p", ctx)); 610257251Skib KASSERT((base & DMAR_PAGE_MASK) == 0, 611257251Skib ("non-aligned base %p %jx %jx", ctx, (uintmax_t)base, 612257251Skib (uintmax_t)size)); 613257251Skib KASSERT((size & DMAR_PAGE_MASK) == 0, 614257251Skib ("non-aligned size %p %jx %jx", ctx, (uintmax_t)base, 615257251Skib (uintmax_t)size)); 616257251Skib KASSERT(base < (1ULL << ctx->agaw), 617257251Skib ("base too high %p %jx %jx agaw %d", ctx, (uintmax_t)base, 618257251Skib (uintmax_t)size, ctx->agaw)); 619257251Skib KASSERT(base + size < (1ULL << ctx->agaw), 620257251Skib ("end too high %p %jx %jx agaw %d", ctx, (uintmax_t)base, 621257251Skib (uintmax_t)size, ctx->agaw)); 622257251Skib KASSERT(base + size > base, 623257251Skib ("size overflow %p %jx %jx", ctx, (uintmax_t)base, 624257251Skib (uintmax_t)size)); 625257251Skib KASSERT((flags & ~DMAR_PGF_WAITOK) == 0, ("invalid flags %x", flags)); 626257251Skib 627259512Skib pg_sz = 0; /* silence gcc */ 628257251Skib flags |= DMAR_PGF_OBJL; 629257251Skib TD_PREP_PINNED_ASSERT; 630257251Skib 631257251Skib for (sf = NULL; size > 0; base += pg_sz, size -= pg_sz) { 632257251Skib for (lvl = 0; lvl < ctx->pglvl; lvl++) { 633257251Skib if (lvl != ctx->pglvl - 1 && !ctx_is_sp_lvl(ctx, lvl)) 634257251Skib continue; 635257251Skib pg_sz = ctx_page_size(ctx, lvl); 636257251Skib if (pg_sz > size) 637257251Skib continue; 638257251Skib pte = ctx_pgtbl_map_pte(ctx, base, lvl, flags, 639257251Skib &idx, &sf); 640257251Skib KASSERT(pte != NULL, 641257251Skib ("sleeping or page missed %p %jx %d 0x%x", 642257251Skib ctx, (uintmax_t)base, lvl, flags)); 643257251Skib if ((pte->pte & DMAR_PTE_SP) != 0 || 644257251Skib lvl == ctx->pglvl - 1) { 645257251Skib ctx_unmap_clear_pte(ctx, base, lvl, flags, 646257251Skib pte, &sf, false); 647257251Skib break; 648257251Skib } 649257251Skib } 650257251Skib KASSERT(size >= pg_sz, 651257251Skib ("unmapping loop overflow %p %jx %jx %jx", ctx, 652257251Skib (uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz)); 653257251Skib } 654257251Skib if (sf != NULL) 655277315Skib dmar_unmap_pgtbl(sf); 656257251Skib /* 657257251Skib * See 11.1 Write Buffer Flushing for an explanation why RWBF 658257251Skib * can be ignored there. 659257251Skib */ 660257251Skib 661257251Skib TD_PINNED_ASSERT; 662257251Skib return (0); 663257251Skib} 664257251Skib 665257251Skibint 666257251Skibctx_unmap_buf(struct dmar_ctx *ctx, dmar_gaddr_t base, dmar_gaddr_t size, 667257251Skib int flags) 668257251Skib{ 669259512Skib int error; 670257251Skib 671257251Skib DMAR_CTX_PGLOCK(ctx); 672259512Skib error = ctx_unmap_buf_locked(ctx, base, size, flags); 673259512Skib DMAR_CTX_PGUNLOCK(ctx); 674259512Skib return (error); 675257251Skib} 676257251Skib 677257251Skibint 678257251Skibctx_alloc_pgtbl(struct dmar_ctx *ctx) 679257251Skib{ 680257251Skib vm_page_t m; 681257251Skib 682257251Skib KASSERT(ctx->pgtbl_obj == NULL, ("already initialized %p", ctx)); 683257251Skib 684257251Skib ctx->pgtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL, 685257251Skib IDX_TO_OFF(pglvl_max_pages(ctx->pglvl)), 0, 0, NULL); 686257251Skib DMAR_CTX_PGLOCK(ctx); 687257251Skib m = dmar_pgalloc(ctx->pgtbl_obj, 0, DMAR_PGF_WAITOK | 688257251Skib DMAR_PGF_ZERO | DMAR_PGF_OBJL); 689257251Skib /* No implicit free of the top level page table page. */ 690257251Skib m->wire_count = 1; 691257251Skib DMAR_CTX_PGUNLOCK(ctx); 692257251Skib return (0); 693257251Skib} 694257251Skib 695257251Skibvoid 696257251Skibctx_free_pgtbl(struct dmar_ctx *ctx) 697257251Skib{ 698257251Skib vm_object_t obj; 699257251Skib vm_page_t m; 700257251Skib 701257251Skib obj = ctx->pgtbl_obj; 702257251Skib if (obj == NULL) { 703257251Skib KASSERT((ctx->dmar->hw_ecap & DMAR_ECAP_PT) != 0 && 704257251Skib (ctx->flags & DMAR_CTX_IDMAP) != 0, 705257251Skib ("lost pagetable object ctx %p", ctx)); 706257251Skib return; 707257251Skib } 708257251Skib DMAR_CTX_ASSERT_PGLOCKED(ctx); 709257251Skib ctx->pgtbl_obj = NULL; 710257251Skib 711257251Skib if ((ctx->flags & DMAR_CTX_IDMAP) != 0) { 712257251Skib put_idmap_pgtbl(obj); 713257251Skib ctx->flags &= ~DMAR_CTX_IDMAP; 714257251Skib return; 715257251Skib } 716257251Skib 717257251Skib /* Obliterate wire_counts */ 718257251Skib VM_OBJECT_ASSERT_WLOCKED(obj); 719257251Skib for (m = vm_page_lookup(obj, 0); m != NULL; m = vm_page_next(m)) 720257251Skib m->wire_count = 0; 721257251Skib VM_OBJECT_WUNLOCK(obj); 722257251Skib vm_object_deallocate(obj); 723257251Skib} 724257251Skib 725257251Skibstatic inline uint64_t 726257251Skibctx_wait_iotlb_flush(struct dmar_unit *unit, uint64_t wt, int iro) 727257251Skib{ 728257251Skib uint64_t iotlbr; 729257251Skib 730257251Skib dmar_write8(unit, iro + DMAR_IOTLB_REG_OFF, DMAR_IOTLB_IVT | 731257251Skib DMAR_IOTLB_DR | DMAR_IOTLB_DW | wt); 732257251Skib for (;;) { 733257251Skib iotlbr = dmar_read8(unit, iro + DMAR_IOTLB_REG_OFF); 734257251Skib if ((iotlbr & DMAR_IOTLB_IVT) == 0) 735257251Skib break; 736257251Skib cpu_spinwait(); 737257251Skib } 738257251Skib return (iotlbr); 739257251Skib} 740257251Skib 741259512Skibvoid 742259512Skibctx_flush_iotlb_sync(struct dmar_ctx *ctx, dmar_gaddr_t base, dmar_gaddr_t size) 743257251Skib{ 744257251Skib struct dmar_unit *unit; 745257251Skib dmar_gaddr_t isize; 746257251Skib uint64_t iotlbr; 747257251Skib int am, iro; 748257251Skib 749257251Skib unit = ctx->dmar; 750259512Skib KASSERT(!unit->qi_enabled, ("dmar%d: sync iotlb flush call", 751259512Skib unit->unit)); 752257251Skib iro = DMAR_ECAP_IRO(unit->hw_ecap) * 16; 753257251Skib DMAR_LOCK(unit); 754257251Skib if ((unit->hw_cap & DMAR_CAP_PSI) == 0 || size > 2 * 1024 * 1024) { 755257251Skib iotlbr = ctx_wait_iotlb_flush(unit, DMAR_IOTLB_IIRG_DOM | 756257251Skib DMAR_IOTLB_DID(ctx->domain), iro); 757257251Skib KASSERT((iotlbr & DMAR_IOTLB_IAIG_MASK) != 758257251Skib DMAR_IOTLB_IAIG_INVLD, 759257251Skib ("dmar%d: invalidation failed %jx", unit->unit, 760257251Skib (uintmax_t)iotlbr)); 761257251Skib } else { 762257251Skib for (; size > 0; base += isize, size -= isize) { 763259512Skib am = calc_am(unit, base, size, &isize); 764257251Skib dmar_write8(unit, iro, base | am); 765257251Skib iotlbr = ctx_wait_iotlb_flush(unit, 766257251Skib DMAR_IOTLB_IIRG_PAGE | DMAR_IOTLB_DID(ctx->domain), 767257251Skib iro); 768257251Skib KASSERT((iotlbr & DMAR_IOTLB_IAIG_MASK) != 769257251Skib DMAR_IOTLB_IAIG_INVLD, 770257251Skib ("dmar%d: PSI invalidation failed " 771257251Skib "iotlbr 0x%jx base 0x%jx size 0x%jx am %d", 772257251Skib unit->unit, (uintmax_t)iotlbr, 773257251Skib (uintmax_t)base, (uintmax_t)size, am)); 774257251Skib /* 775257251Skib * Any non-page granularity covers whole guest 776257251Skib * address space for the domain. 777257251Skib */ 778257251Skib if ((iotlbr & DMAR_IOTLB_IAIG_MASK) != 779257251Skib DMAR_IOTLB_IAIG_PAGE) 780257251Skib break; 781257251Skib } 782257251Skib } 783257251Skib DMAR_UNLOCK(unit); 784257251Skib} 785