intel_idpgtbl.c revision 257900
1257251Skib/*- 2257251Skib * Copyright (c) 2013 The FreeBSD Foundation 3257251Skib * All rights reserved. 4257251Skib * 5257251Skib * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 6257251Skib * under sponsorship from the FreeBSD Foundation. 7257251Skib * 8257251Skib * Redistribution and use in source and binary forms, with or without 9257251Skib * modification, are permitted provided that the following conditions 10257251Skib * are met: 11257251Skib * 1. Redistributions of source code must retain the above copyright 12257251Skib * notice, this list of conditions and the following disclaimer. 13257251Skib * 2. Redistributions in binary form must reproduce the above copyright 14257251Skib * notice, this list of conditions and the following disclaimer in the 15257251Skib * documentation and/or other materials provided with the distribution. 16257251Skib * 17257251Skib * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18257251Skib * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19257251Skib * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20257251Skib * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21257251Skib * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22257251Skib * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23257251Skib * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24257251Skib * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25257251Skib * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26257251Skib * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27257251Skib * SUCH DAMAGE. 28257251Skib */ 29257251Skib 30257251Skib#include <sys/cdefs.h> 31257251Skib__FBSDID("$FreeBSD: head/sys/x86/iommu/intel_idpgtbl.c 257900 2013-11-09 20:36:52Z dim $"); 32257251Skib 33257251Skib#include <sys/param.h> 34257251Skib#include <sys/systm.h> 35257251Skib#include <sys/malloc.h> 36257251Skib#include <sys/bus.h> 37257251Skib#include <sys/interrupt.h> 38257251Skib#include <sys/kernel.h> 39257251Skib#include <sys/ktr.h> 40257251Skib#include <sys/lock.h> 41257251Skib#include <sys/memdesc.h> 42257251Skib#include <sys/mutex.h> 43257251Skib#include <sys/proc.h> 44257251Skib#include <sys/rwlock.h> 45257251Skib#include <sys/rman.h> 46257251Skib#include <sys/sf_buf.h> 47257251Skib#include <sys/sysctl.h> 48257251Skib#include <sys/taskqueue.h> 49257251Skib#include <sys/tree.h> 50257251Skib#include <sys/uio.h> 51257251Skib#include <vm/vm.h> 52257251Skib#include <vm/vm_extern.h> 53257251Skib#include <vm/vm_kern.h> 54257251Skib#include <vm/vm_object.h> 55257251Skib#include <vm/vm_page.h> 56257251Skib#include <vm/vm_pager.h> 57257251Skib#include <vm/vm_map.h> 58257251Skib#include <machine/atomic.h> 59257251Skib#include <machine/bus.h> 60257251Skib#include <machine/cpu.h> 61257251Skib#include <machine/md_var.h> 62257251Skib#include <machine/specialreg.h> 63257251Skib#include <x86/include/busdma_impl.h> 64257251Skib#include <x86/iommu/intel_reg.h> 65257251Skib#include <x86/iommu/busdma_dmar.h> 66257251Skib#include <x86/iommu/intel_dmar.h> 67257251Skib 68257251Skibstatic int ctx_unmap_buf_locked(struct dmar_ctx *ctx, dmar_gaddr_t base, 69257251Skib dmar_gaddr_t size, int flags); 70257251Skib 71257251Skib/* 72257251Skib * The cache of the identity mapping page tables for the DMARs. Using 73257251Skib * the cache saves significant amount of memory for page tables by 74257251Skib * reusing the page tables, since usually DMARs are identical and have 75257251Skib * the same capabilities. Still, cache records the information needed 76257251Skib * to match DMAR capabilities and page table format, to correctly 77257251Skib * handle different DMARs. 78257251Skib */ 79257251Skib 80257251Skibstruct idpgtbl { 81257251Skib dmar_gaddr_t maxaddr; /* Page table covers the guest address 82257251Skib range [0..maxaddr) */ 83257251Skib int pglvl; /* Total page table levels ignoring 84257251Skib superpages */ 85257251Skib int leaf; /* The last materialized page table 86257251Skib level, it is non-zero if superpages 87257251Skib are supported */ 88257251Skib vm_object_t pgtbl_obj; /* The page table pages */ 89257251Skib LIST_ENTRY(idpgtbl) link; 90257251Skib}; 91257251Skib 92257251Skibstatic struct sx idpgtbl_lock; 93257251SkibSX_SYSINIT(idpgtbl, &idpgtbl_lock, "idpgtbl"); 94257251Skibstatic LIST_HEAD(, idpgtbl) idpgtbls = LIST_HEAD_INITIALIZER(idpgtbls); 95257251Skibstatic MALLOC_DEFINE(M_DMAR_IDPGTBL, "dmar_idpgtbl", 96257251Skib "Intel DMAR Identity mappings cache elements"); 97257251Skib 98257251Skib/* 99257251Skib * Build the next level of the page tables for the identity mapping. 100257251Skib * - lvl is the level to build; 101257251Skib * - idx is the index of the page table page in the pgtbl_obj, which is 102257251Skib * being allocated filled now; 103257251Skib * - addr is the starting address in the bus address space which is 104257251Skib * mapped by the page table page. 105257251Skib */ 106257251Skibstatic void 107257251Skibctx_idmap_nextlvl(struct idpgtbl *tbl, int lvl, vm_pindex_t idx, 108257251Skib dmar_gaddr_t addr) 109257251Skib{ 110257251Skib vm_page_t m, m1; 111257251Skib dmar_pte_t *pte; 112257251Skib struct sf_buf *sf; 113257251Skib dmar_gaddr_t f, pg_sz; 114257251Skib vm_pindex_t base; 115257251Skib int i; 116257251Skib 117257251Skib VM_OBJECT_ASSERT_LOCKED(tbl->pgtbl_obj); 118257251Skib if (addr >= tbl->maxaddr) 119257251Skib return; 120257251Skib m = dmar_pgalloc(tbl->pgtbl_obj, idx, DMAR_PGF_OBJL | DMAR_PGF_WAITOK | 121257251Skib DMAR_PGF_ZERO); 122257251Skib base = idx * DMAR_NPTEPG + 1; /* Index of the first child page of idx */ 123257251Skib pg_sz = pglvl_page_size(tbl->pglvl, lvl); 124257251Skib if (lvl != tbl->leaf) { 125257251Skib for (i = 0, f = addr; i < DMAR_NPTEPG; i++, f += pg_sz) 126257251Skib ctx_idmap_nextlvl(tbl, lvl + 1, base + i, f); 127257251Skib } 128257251Skib VM_OBJECT_WUNLOCK(tbl->pgtbl_obj); 129257251Skib pte = dmar_map_pgtbl(tbl->pgtbl_obj, idx, DMAR_PGF_WAITOK, &sf); 130257251Skib if (lvl == tbl->leaf) { 131257251Skib for (i = 0, f = addr; i < DMAR_NPTEPG; i++, f += pg_sz) { 132257251Skib if (f >= tbl->maxaddr) 133257251Skib break; 134257251Skib pte[i].pte = (DMAR_PTE_ADDR_MASK & f) | 135257251Skib DMAR_PTE_R | DMAR_PTE_W; 136257251Skib } 137257251Skib } else { 138257251Skib for (i = 0, f = addr; i < DMAR_NPTEPG; i++, f += pg_sz) { 139257251Skib if (f >= tbl->maxaddr) 140257251Skib break; 141257251Skib m1 = dmar_pgalloc(tbl->pgtbl_obj, base + i, 142257251Skib DMAR_PGF_NOALLOC); 143257251Skib KASSERT(m1 != NULL, ("lost page table page")); 144257251Skib pte[i].pte = (DMAR_PTE_ADDR_MASK & 145257251Skib VM_PAGE_TO_PHYS(m1)) | DMAR_PTE_R | DMAR_PTE_W; 146257251Skib } 147257251Skib } 148257251Skib /* ctx_get_idmap_pgtbl flushes CPU cache if needed. */ 149257251Skib dmar_unmap_pgtbl(sf, true); 150257251Skib VM_OBJECT_WLOCK(tbl->pgtbl_obj); 151257251Skib} 152257251Skib 153257251Skib/* 154257251Skib * Find a ready and compatible identity-mapping page table in the 155257251Skib * cache. If not found, populate the identity-mapping page table for 156257251Skib * the context, up to the maxaddr. The maxaddr byte is allowed to be 157257251Skib * not mapped, which is aligned with the definition of Maxmem as the 158257251Skib * highest usable physical address + 1. If superpages are used, the 159257251Skib * maxaddr is typically mapped. 160257251Skib */ 161257251Skibvm_object_t 162257251Skibctx_get_idmap_pgtbl(struct dmar_ctx *ctx, dmar_gaddr_t maxaddr) 163257251Skib{ 164257251Skib struct dmar_unit *unit; 165257251Skib struct idpgtbl *tbl; 166257251Skib vm_object_t res; 167257251Skib vm_page_t m; 168257251Skib int leaf, i; 169257251Skib 170257900Sdim leaf = 0; /* silence gcc */ 171257900Sdim 172257251Skib /* 173257251Skib * First, determine where to stop the paging structures. 174257251Skib */ 175257251Skib for (i = 0; i < ctx->pglvl; i++) { 176257251Skib if (i == ctx->pglvl - 1 || ctx_is_sp_lvl(ctx, i)) { 177257251Skib leaf = i; 178257251Skib break; 179257251Skib } 180257251Skib } 181257251Skib 182257251Skib /* 183257251Skib * Search the cache for a compatible page table. Qualified 184257251Skib * page table must map up to maxaddr, its level must be 185257251Skib * supported by the DMAR and leaf should be equal to the 186257251Skib * calculated value. The later restriction could be lifted 187257251Skib * but I believe it is currently impossible to have any 188257251Skib * deviations for existing hardware. 189257251Skib */ 190257251Skib sx_slock(&idpgtbl_lock); 191257251Skib LIST_FOREACH(tbl, &idpgtbls, link) { 192257251Skib if (tbl->maxaddr >= maxaddr && 193257251Skib dmar_pglvl_supported(ctx->dmar, tbl->pglvl) && 194257251Skib tbl->leaf == leaf) { 195257251Skib res = tbl->pgtbl_obj; 196257251Skib vm_object_reference(res); 197257251Skib sx_sunlock(&idpgtbl_lock); 198257251Skib ctx->pglvl = tbl->pglvl; /* XXXKIB ? */ 199257251Skib goto end; 200257251Skib } 201257251Skib } 202257251Skib 203257251Skib /* 204257251Skib * Not found in cache, relock the cache into exclusive mode to 205257251Skib * be able to add element, and recheck cache again after the 206257251Skib * relock. 207257251Skib */ 208257251Skib sx_sunlock(&idpgtbl_lock); 209257251Skib sx_xlock(&idpgtbl_lock); 210257251Skib LIST_FOREACH(tbl, &idpgtbls, link) { 211257251Skib if (tbl->maxaddr >= maxaddr && 212257251Skib dmar_pglvl_supported(ctx->dmar, tbl->pglvl) && 213257251Skib tbl->leaf == leaf) { 214257251Skib res = tbl->pgtbl_obj; 215257251Skib vm_object_reference(res); 216257251Skib sx_xunlock(&idpgtbl_lock); 217257251Skib ctx->pglvl = tbl->pglvl; /* XXXKIB ? */ 218257251Skib return (res); 219257251Skib } 220257251Skib } 221257251Skib 222257251Skib /* 223257251Skib * Still not found, create new page table. 224257251Skib */ 225257251Skib tbl = malloc(sizeof(*tbl), M_DMAR_IDPGTBL, M_WAITOK); 226257251Skib tbl->pglvl = ctx->pglvl; 227257251Skib tbl->leaf = leaf; 228257251Skib tbl->maxaddr = maxaddr; 229257251Skib tbl->pgtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL, 230257251Skib IDX_TO_OFF(pglvl_max_pages(tbl->pglvl)), 0, 0, NULL); 231257251Skib VM_OBJECT_WLOCK(tbl->pgtbl_obj); 232257251Skib ctx_idmap_nextlvl(tbl, 0, 0, 0); 233257251Skib VM_OBJECT_WUNLOCK(tbl->pgtbl_obj); 234257251Skib LIST_INSERT_HEAD(&idpgtbls, tbl, link); 235257251Skib res = tbl->pgtbl_obj; 236257251Skib vm_object_reference(res); 237257251Skib sx_xunlock(&idpgtbl_lock); 238257251Skib 239257251Skibend: 240257251Skib /* 241257251Skib * Table was found or created. 242257251Skib * 243257251Skib * If DMAR does not snoop paging structures accesses, flush 244257251Skib * CPU cache to memory. Note that dmar_unmap_pgtbl() coherent 245257251Skib * argument was possibly invalid at the time of the identity 246257251Skib * page table creation, since DMAR which was passed at the 247257251Skib * time of creation could be coherent, while current DMAR is 248257251Skib * not. 249257251Skib * 250257251Skib * If DMAR cannot look into the chipset write buffer, flush it 251257251Skib * as well. 252257251Skib */ 253257251Skib unit = ctx->dmar; 254257251Skib if (!DMAR_IS_COHERENT(unit)) { 255257251Skib VM_OBJECT_WLOCK(res); 256257251Skib for (m = vm_page_lookup(res, 0); m != NULL; 257257251Skib m = vm_page_next(m)) 258257251Skib pmap_invalidate_cache_pages(&m, 1); 259257251Skib VM_OBJECT_WUNLOCK(res); 260257251Skib } 261257251Skib if ((unit->hw_cap & DMAR_CAP_RWBF) != 0) { 262257251Skib DMAR_LOCK(unit); 263257251Skib dmar_flush_write_bufs(unit); 264257251Skib DMAR_UNLOCK(unit); 265257251Skib } 266257251Skib 267257251Skib return (res); 268257251Skib} 269257251Skib 270257251Skib/* 271257251Skib * Return a reference to the identity mapping page table to the cache. 272257251Skib */ 273257251Skibvoid 274257251Skibput_idmap_pgtbl(vm_object_t obj) 275257251Skib{ 276257251Skib struct idpgtbl *tbl, *tbl1; 277257251Skib vm_object_t rmobj; 278257251Skib 279257251Skib sx_slock(&idpgtbl_lock); 280257251Skib KASSERT(obj->ref_count >= 2, ("lost cache reference")); 281257251Skib vm_object_deallocate(obj); 282257251Skib 283257251Skib /* 284257251Skib * Cache always owns one last reference on the page table object. 285257251Skib * If there is an additional reference, object must stay. 286257251Skib */ 287257251Skib if (obj->ref_count > 1) { 288257251Skib sx_sunlock(&idpgtbl_lock); 289257251Skib return; 290257251Skib } 291257251Skib 292257251Skib /* 293257251Skib * Cache reference is the last, remove cache element and free 294257251Skib * page table object, returning the page table pages to the 295257251Skib * system. 296257251Skib */ 297257251Skib sx_sunlock(&idpgtbl_lock); 298257251Skib sx_xlock(&idpgtbl_lock); 299257251Skib LIST_FOREACH_SAFE(tbl, &idpgtbls, link, tbl1) { 300257251Skib rmobj = tbl->pgtbl_obj; 301257251Skib if (rmobj->ref_count == 1) { 302257251Skib LIST_REMOVE(tbl, link); 303257251Skib atomic_subtract_int(&dmar_tbl_pagecnt, 304257251Skib rmobj->resident_page_count); 305257251Skib vm_object_deallocate(rmobj); 306257251Skib free(tbl, M_DMAR_IDPGTBL); 307257251Skib } 308257251Skib } 309257251Skib sx_xunlock(&idpgtbl_lock); 310257251Skib} 311257251Skib 312257251Skib/* 313257251Skib * The core routines to map and unmap host pages at the given guest 314257251Skib * address. Support superpages. 315257251Skib */ 316257251Skib 317257251Skib/* 318257251Skib * Index of the pte for the guest address base in the page table at 319257251Skib * the level lvl. 320257251Skib */ 321257251Skibstatic int 322257251Skibctx_pgtbl_pte_off(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl) 323257251Skib{ 324257251Skib 325257251Skib base >>= DMAR_PAGE_SHIFT + (ctx->pglvl - lvl - 1) * DMAR_NPTEPGSHIFT; 326257251Skib return (base & DMAR_PTEMASK); 327257251Skib} 328257251Skib 329257251Skib/* 330257251Skib * Returns the page index of the page table page in the page table 331257251Skib * object, which maps the given address base at the page table level 332257251Skib * lvl. 333257251Skib */ 334257251Skibstatic vm_pindex_t 335257251Skibctx_pgtbl_get_pindex(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl) 336257251Skib{ 337257251Skib vm_pindex_t idx, pidx; 338257251Skib int i; 339257251Skib 340257251Skib KASSERT(lvl >= 0 && lvl < ctx->pglvl, ("wrong lvl %p %d", ctx, lvl)); 341257251Skib 342257251Skib for (pidx = idx = 0, i = 0; i < lvl; i++, pidx = idx) 343257251Skib idx = ctx_pgtbl_pte_off(ctx, base, i) + pidx * DMAR_NPTEPG + 1; 344257251Skib return (idx); 345257251Skib} 346257251Skib 347257251Skibstatic dmar_pte_t * 348257251Skibctx_pgtbl_map_pte(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl, int flags, 349257251Skib vm_pindex_t *idxp, struct sf_buf **sf) 350257251Skib{ 351257251Skib vm_page_t m; 352257251Skib struct sf_buf *sfp; 353257251Skib dmar_pte_t *pte, *ptep; 354257251Skib vm_pindex_t idx, idx1; 355257251Skib 356257251Skib DMAR_CTX_ASSERT_PGLOCKED(ctx); 357257251Skib KASSERT((flags & DMAR_PGF_OBJL) != 0, ("lost PGF_OBJL")); 358257251Skib 359257251Skib idx = ctx_pgtbl_get_pindex(ctx, base, lvl); 360257251Skib if (*sf != NULL && idx == *idxp) { 361257251Skib pte = (dmar_pte_t *)sf_buf_kva(*sf); 362257251Skib } else { 363257251Skib if (*sf != NULL) 364257251Skib dmar_unmap_pgtbl(*sf, DMAR_IS_COHERENT(ctx->dmar)); 365257251Skib *idxp = idx; 366257251Skibretry: 367257251Skib pte = dmar_map_pgtbl(ctx->pgtbl_obj, idx, flags, sf); 368257251Skib if (pte == NULL) { 369257251Skib KASSERT(lvl > 0, ("lost root page table page %p", ctx)); 370257251Skib /* 371257251Skib * Page table page does not exists, allocate 372257251Skib * it and create pte in the up level. 373257251Skib */ 374257251Skib m = dmar_pgalloc(ctx->pgtbl_obj, idx, flags | 375257251Skib DMAR_PGF_ZERO); 376257251Skib if (m == NULL) 377257251Skib return (NULL); 378257251Skib 379257251Skib /* 380257251Skib * Prevent potential free while pgtbl_obj is 381257251Skib * unlocked in the recursive call to 382257251Skib * ctx_pgtbl_map_pte(), if other thread did 383257251Skib * pte write and clean while the lock if 384257251Skib * dropped. 385257251Skib */ 386257251Skib m->wire_count++; 387257251Skib 388257251Skib sfp = NULL; 389257251Skib ptep = ctx_pgtbl_map_pte(ctx, base, lvl - 1, flags, 390257251Skib &idx1, &sfp); 391257251Skib if (ptep == NULL) { 392257251Skib KASSERT(m->pindex != 0, 393257251Skib ("loosing root page %p", ctx)); 394257251Skib m->wire_count--; 395257251Skib dmar_pgfree(ctx->pgtbl_obj, m->pindex, flags); 396257251Skib return (NULL); 397257251Skib } 398257251Skib dmar_pte_store(&ptep->pte, DMAR_PTE_R | DMAR_PTE_W | 399257251Skib VM_PAGE_TO_PHYS(m)); 400257251Skib sf_buf_page(sfp)->wire_count += 1; 401257251Skib m->wire_count--; 402257251Skib dmar_unmap_pgtbl(sfp, DMAR_IS_COHERENT(ctx->dmar)); 403257251Skib /* Only executed once. */ 404257251Skib goto retry; 405257251Skib } 406257251Skib } 407257251Skib pte += ctx_pgtbl_pte_off(ctx, base, lvl); 408257251Skib return (pte); 409257251Skib} 410257251Skib 411257251Skibstatic int 412257251Skibctx_map_buf_locked(struct dmar_ctx *ctx, dmar_gaddr_t base, dmar_gaddr_t size, 413257251Skib vm_page_t *ma, uint64_t pflags, int flags) 414257251Skib{ 415257251Skib dmar_pte_t *pte; 416257251Skib struct sf_buf *sf; 417257251Skib dmar_gaddr_t pg_sz, base1, size1; 418257251Skib vm_pindex_t pi, c, idx, run_sz; 419257251Skib int lvl; 420257251Skib bool superpage; 421257251Skib 422257251Skib DMAR_CTX_ASSERT_PGLOCKED(ctx); 423257251Skib 424257251Skib base1 = base; 425257251Skib size1 = size; 426257251Skib flags |= DMAR_PGF_OBJL; 427257251Skib TD_PREP_PINNED_ASSERT; 428257251Skib 429257251Skib for (sf = NULL, pi = 0; size > 0; base += pg_sz, size -= pg_sz, 430257251Skib pi += run_sz) { 431257251Skib for (lvl = 0, c = 0, superpage = false;; lvl++) { 432257251Skib pg_sz = ctx_page_size(ctx, lvl); 433257251Skib run_sz = pg_sz >> DMAR_PAGE_SHIFT; 434257251Skib if (lvl == ctx->pglvl - 1) 435257251Skib break; 436257251Skib /* 437257251Skib * Check if the current base suitable for the 438257251Skib * superpage mapping. First, verify the level. 439257251Skib */ 440257251Skib if (!ctx_is_sp_lvl(ctx, lvl)) 441257251Skib continue; 442257251Skib /* 443257251Skib * Next, look at the size of the mapping and 444257251Skib * alignment of both guest and host addresses. 445257251Skib */ 446257251Skib if (size < pg_sz || (base & (pg_sz - 1)) != 0 || 447257251Skib (VM_PAGE_TO_PHYS(ma[pi]) & (pg_sz - 1)) != 0) 448257251Skib continue; 449257251Skib /* All passed, check host pages contiguouty. */ 450257251Skib if (c == 0) { 451257251Skib for (c = 1; c < run_sz; c++) { 452257251Skib if (VM_PAGE_TO_PHYS(ma[pi + c]) != 453257251Skib VM_PAGE_TO_PHYS(ma[pi + c - 1]) + 454257251Skib PAGE_SIZE) 455257251Skib break; 456257251Skib } 457257251Skib } 458257251Skib if (c >= run_sz) { 459257251Skib superpage = true; 460257251Skib break; 461257251Skib } 462257251Skib } 463257251Skib KASSERT(size >= pg_sz, 464257251Skib ("mapping loop overflow %p %jx %jx %jx", ctx, 465257251Skib (uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz)); 466257251Skib pte = ctx_pgtbl_map_pte(ctx, base, lvl, flags, &idx, &sf); 467257251Skib if (pte == NULL) { 468257251Skib KASSERT((flags & DMAR_PGF_WAITOK) == 0, 469257251Skib ("failed waitable pte alloc %p", ctx)); 470257251Skib if (sf != NULL) { 471257251Skib dmar_unmap_pgtbl(sf, 472257251Skib DMAR_IS_COHERENT(ctx->dmar)); 473257251Skib } 474257251Skib ctx_unmap_buf_locked(ctx, base1, base - base1, flags); 475257251Skib TD_PINNED_ASSERT; 476257251Skib return (ENOMEM); 477257251Skib } 478257251Skib dmar_pte_store(&pte->pte, VM_PAGE_TO_PHYS(ma[pi]) | pflags | 479257251Skib (superpage ? DMAR_PTE_SP : 0)); 480257251Skib sf_buf_page(sf)->wire_count += 1; 481257251Skib } 482257251Skib if (sf != NULL) 483257251Skib dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(ctx->dmar)); 484257251Skib TD_PINNED_ASSERT; 485257251Skib return (0); 486257251Skib} 487257251Skib 488257251Skibint 489257251Skibctx_map_buf(struct dmar_ctx *ctx, dmar_gaddr_t base, dmar_gaddr_t size, 490257251Skib vm_page_t *ma, uint64_t pflags, int flags) 491257251Skib{ 492257512Skib struct dmar_unit *unit; 493257512Skib int error; 494257251Skib 495257512Skib unit = ctx->dmar; 496257512Skib 497257251Skib KASSERT((ctx->flags & DMAR_CTX_IDMAP) == 0, 498257251Skib ("modifying idmap pagetable ctx %p", ctx)); 499257251Skib KASSERT((base & DMAR_PAGE_MASK) == 0, 500257251Skib ("non-aligned base %p %jx %jx", ctx, (uintmax_t)base, 501257251Skib (uintmax_t)size)); 502257251Skib KASSERT((size & DMAR_PAGE_MASK) == 0, 503257251Skib ("non-aligned size %p %jx %jx", ctx, (uintmax_t)base, 504257251Skib (uintmax_t)size)); 505257251Skib KASSERT(size > 0, ("zero size %p %jx %jx", ctx, (uintmax_t)base, 506257251Skib (uintmax_t)size)); 507257251Skib KASSERT(base < (1ULL << ctx->agaw), 508257251Skib ("base too high %p %jx %jx agaw %d", ctx, (uintmax_t)base, 509257251Skib (uintmax_t)size, ctx->agaw)); 510257251Skib KASSERT(base + size < (1ULL << ctx->agaw), 511257251Skib ("end too high %p %jx %jx agaw %d", ctx, (uintmax_t)base, 512257251Skib (uintmax_t)size, ctx->agaw)); 513257251Skib KASSERT(base + size > base, 514257251Skib ("size overflow %p %jx %jx", ctx, (uintmax_t)base, 515257251Skib (uintmax_t)size)); 516257251Skib KASSERT((pflags & (DMAR_PTE_R | DMAR_PTE_W)) != 0, 517257251Skib ("neither read nor write %jx", (uintmax_t)pflags)); 518257251Skib KASSERT((pflags & ~(DMAR_PTE_R | DMAR_PTE_W | DMAR_PTE_SNP | 519257251Skib DMAR_PTE_TM)) == 0, 520257251Skib ("invalid pte flags %jx", (uintmax_t)pflags)); 521257251Skib KASSERT((pflags & DMAR_PTE_SNP) == 0 || 522257512Skib (unit->hw_ecap & DMAR_ECAP_SC) != 0, 523257251Skib ("PTE_SNP for dmar without snoop control %p %jx", 524257251Skib ctx, (uintmax_t)pflags)); 525257251Skib KASSERT((pflags & DMAR_PTE_TM) == 0 || 526257512Skib (unit->hw_ecap & DMAR_ECAP_DI) != 0, 527257251Skib ("PTE_TM for dmar without DIOTLB %p %jx", 528257251Skib ctx, (uintmax_t)pflags)); 529257251Skib KASSERT((flags & ~DMAR_PGF_WAITOK) == 0, ("invalid flags %x", flags)); 530257251Skib 531257251Skib DMAR_CTX_PGLOCK(ctx); 532257512Skib error = ctx_map_buf_locked(ctx, base, size, ma, pflags, flags); 533257512Skib DMAR_CTX_PGUNLOCK(ctx); 534257512Skib if (error != 0) 535257512Skib return (error); 536257512Skib 537257512Skib if ((unit->hw_cap & DMAR_CAP_CM) != 0) 538257512Skib ctx_flush_iotlb_sync(ctx, base, size); 539257512Skib else if ((unit->hw_cap & DMAR_CAP_RWBF) != 0) { 540257512Skib /* See 11.1 Write Buffer Flushing. */ 541257512Skib DMAR_LOCK(unit); 542257512Skib dmar_flush_write_bufs(unit); 543257512Skib DMAR_UNLOCK(unit); 544257512Skib } 545257512Skib return (0); 546257251Skib} 547257251Skib 548257251Skibstatic void ctx_unmap_clear_pte(struct dmar_ctx *ctx, dmar_gaddr_t base, 549257251Skib int lvl, int flags, dmar_pte_t *pte, struct sf_buf **sf, bool free_fs); 550257251Skib 551257251Skibstatic void 552257251Skibctx_free_pgtbl_pde(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl, int flags) 553257251Skib{ 554257251Skib struct sf_buf *sf; 555257251Skib dmar_pte_t *pde; 556257251Skib vm_pindex_t idx; 557257251Skib 558257251Skib sf = NULL; 559257251Skib pde = ctx_pgtbl_map_pte(ctx, base, lvl, flags, &idx, &sf); 560257251Skib ctx_unmap_clear_pte(ctx, base, lvl, flags, pde, &sf, true); 561257251Skib} 562257251Skib 563257251Skibstatic void 564257251Skibctx_unmap_clear_pte(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl, 565257251Skib int flags, dmar_pte_t *pte, struct sf_buf **sf, bool free_sf) 566257251Skib{ 567257251Skib vm_page_t m; 568257251Skib 569257251Skib dmar_pte_clear(&pte->pte); 570257251Skib m = sf_buf_page(*sf); 571257251Skib if (free_sf) { 572257251Skib dmar_unmap_pgtbl(*sf, DMAR_IS_COHERENT(ctx->dmar)); 573257251Skib *sf = NULL; 574257251Skib } 575257251Skib m->wire_count--; 576257251Skib if (m->wire_count != 0) 577257251Skib return; 578257251Skib KASSERT(lvl != 0, 579257251Skib ("lost reference (lvl) on root pg ctx %p base %jx lvl %d", 580257251Skib ctx, (uintmax_t)base, lvl)); 581257251Skib KASSERT(m->pindex != 0, 582257251Skib ("lost reference (idx) on root pg ctx %p base %jx lvl %d", 583257251Skib ctx, (uintmax_t)base, lvl)); 584257251Skib dmar_pgfree(ctx->pgtbl_obj, m->pindex, flags); 585257251Skib ctx_free_pgtbl_pde(ctx, base, lvl - 1, flags); 586257251Skib} 587257251Skib 588257251Skib/* 589257251Skib * Assumes that the unmap is never partial. 590257251Skib */ 591257251Skibstatic int 592257251Skibctx_unmap_buf_locked(struct dmar_ctx *ctx, dmar_gaddr_t base, 593257251Skib dmar_gaddr_t size, int flags) 594257251Skib{ 595257251Skib dmar_pte_t *pte; 596257251Skib struct sf_buf *sf; 597257251Skib vm_pindex_t idx; 598257251Skib dmar_gaddr_t pg_sz, base1, size1; 599257251Skib int lvl; 600257251Skib 601257251Skib DMAR_CTX_ASSERT_PGLOCKED(ctx); 602257251Skib if (size == 0) 603257251Skib return (0); 604257251Skib 605257251Skib KASSERT((ctx->flags & DMAR_CTX_IDMAP) == 0, 606257251Skib ("modifying idmap pagetable ctx %p", ctx)); 607257251Skib KASSERT((base & DMAR_PAGE_MASK) == 0, 608257251Skib ("non-aligned base %p %jx %jx", ctx, (uintmax_t)base, 609257251Skib (uintmax_t)size)); 610257251Skib KASSERT((size & DMAR_PAGE_MASK) == 0, 611257251Skib ("non-aligned size %p %jx %jx", ctx, (uintmax_t)base, 612257251Skib (uintmax_t)size)); 613257251Skib KASSERT(base < (1ULL << ctx->agaw), 614257251Skib ("base too high %p %jx %jx agaw %d", ctx, (uintmax_t)base, 615257251Skib (uintmax_t)size, ctx->agaw)); 616257251Skib KASSERT(base + size < (1ULL << ctx->agaw), 617257251Skib ("end too high %p %jx %jx agaw %d", ctx, (uintmax_t)base, 618257251Skib (uintmax_t)size, ctx->agaw)); 619257251Skib KASSERT(base + size > base, 620257251Skib ("size overflow %p %jx %jx", ctx, (uintmax_t)base, 621257251Skib (uintmax_t)size)); 622257251Skib KASSERT((flags & ~DMAR_PGF_WAITOK) == 0, ("invalid flags %x", flags)); 623257251Skib 624257900Sdim pg_sz = 0; /* silence gcc */ 625257251Skib base1 = base; 626257251Skib size1 = size; 627257251Skib flags |= DMAR_PGF_OBJL; 628257251Skib TD_PREP_PINNED_ASSERT; 629257251Skib 630257251Skib for (sf = NULL; size > 0; base += pg_sz, size -= pg_sz) { 631257251Skib for (lvl = 0; lvl < ctx->pglvl; lvl++) { 632257251Skib if (lvl != ctx->pglvl - 1 && !ctx_is_sp_lvl(ctx, lvl)) 633257251Skib continue; 634257251Skib pg_sz = ctx_page_size(ctx, lvl); 635257251Skib if (pg_sz > size) 636257251Skib continue; 637257251Skib pte = ctx_pgtbl_map_pte(ctx, base, lvl, flags, 638257251Skib &idx, &sf); 639257251Skib KASSERT(pte != NULL, 640257251Skib ("sleeping or page missed %p %jx %d 0x%x", 641257251Skib ctx, (uintmax_t)base, lvl, flags)); 642257251Skib if ((pte->pte & DMAR_PTE_SP) != 0 || 643257251Skib lvl == ctx->pglvl - 1) { 644257251Skib ctx_unmap_clear_pte(ctx, base, lvl, flags, 645257251Skib pte, &sf, false); 646257251Skib break; 647257251Skib } 648257251Skib } 649257251Skib KASSERT(size >= pg_sz, 650257251Skib ("unmapping loop overflow %p %jx %jx %jx", ctx, 651257251Skib (uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz)); 652257251Skib } 653257251Skib if (sf != NULL) 654257251Skib dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(ctx->dmar)); 655257251Skib /* 656257251Skib * See 11.1 Write Buffer Flushing for an explanation why RWBF 657257251Skib * can be ignored there. 658257251Skib */ 659257251Skib 660257251Skib TD_PINNED_ASSERT; 661257251Skib return (0); 662257251Skib} 663257251Skib 664257251Skibint 665257251Skibctx_unmap_buf(struct dmar_ctx *ctx, dmar_gaddr_t base, dmar_gaddr_t size, 666257251Skib int flags) 667257251Skib{ 668257512Skib int error; 669257251Skib 670257251Skib DMAR_CTX_PGLOCK(ctx); 671257512Skib error = ctx_unmap_buf_locked(ctx, base, size, flags); 672257512Skib DMAR_CTX_PGUNLOCK(ctx); 673257512Skib return (error); 674257251Skib} 675257251Skib 676257251Skibint 677257251Skibctx_alloc_pgtbl(struct dmar_ctx *ctx) 678257251Skib{ 679257251Skib vm_page_t m; 680257251Skib 681257251Skib KASSERT(ctx->pgtbl_obj == NULL, ("already initialized %p", ctx)); 682257251Skib 683257251Skib ctx->pgtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL, 684257251Skib IDX_TO_OFF(pglvl_max_pages(ctx->pglvl)), 0, 0, NULL); 685257251Skib DMAR_CTX_PGLOCK(ctx); 686257251Skib m = dmar_pgalloc(ctx->pgtbl_obj, 0, DMAR_PGF_WAITOK | 687257251Skib DMAR_PGF_ZERO | DMAR_PGF_OBJL); 688257251Skib /* No implicit free of the top level page table page. */ 689257251Skib m->wire_count = 1; 690257251Skib DMAR_CTX_PGUNLOCK(ctx); 691257251Skib return (0); 692257251Skib} 693257251Skib 694257251Skibvoid 695257251Skibctx_free_pgtbl(struct dmar_ctx *ctx) 696257251Skib{ 697257251Skib vm_object_t obj; 698257251Skib vm_page_t m; 699257251Skib 700257251Skib obj = ctx->pgtbl_obj; 701257251Skib if (obj == NULL) { 702257251Skib KASSERT((ctx->dmar->hw_ecap & DMAR_ECAP_PT) != 0 && 703257251Skib (ctx->flags & DMAR_CTX_IDMAP) != 0, 704257251Skib ("lost pagetable object ctx %p", ctx)); 705257251Skib return; 706257251Skib } 707257251Skib DMAR_CTX_ASSERT_PGLOCKED(ctx); 708257251Skib ctx->pgtbl_obj = NULL; 709257251Skib 710257251Skib if ((ctx->flags & DMAR_CTX_IDMAP) != 0) { 711257251Skib put_idmap_pgtbl(obj); 712257251Skib ctx->flags &= ~DMAR_CTX_IDMAP; 713257251Skib return; 714257251Skib } 715257251Skib 716257251Skib /* Obliterate wire_counts */ 717257251Skib VM_OBJECT_ASSERT_WLOCKED(obj); 718257251Skib for (m = vm_page_lookup(obj, 0); m != NULL; m = vm_page_next(m)) 719257251Skib m->wire_count = 0; 720257251Skib VM_OBJECT_WUNLOCK(obj); 721257251Skib vm_object_deallocate(obj); 722257251Skib} 723257251Skib 724257251Skibstatic inline uint64_t 725257251Skibctx_wait_iotlb_flush(struct dmar_unit *unit, uint64_t wt, int iro) 726257251Skib{ 727257251Skib uint64_t iotlbr; 728257251Skib 729257251Skib dmar_write8(unit, iro + DMAR_IOTLB_REG_OFF, DMAR_IOTLB_IVT | 730257251Skib DMAR_IOTLB_DR | DMAR_IOTLB_DW | wt); 731257251Skib for (;;) { 732257251Skib iotlbr = dmar_read8(unit, iro + DMAR_IOTLB_REG_OFF); 733257251Skib if ((iotlbr & DMAR_IOTLB_IVT) == 0) 734257251Skib break; 735257251Skib cpu_spinwait(); 736257251Skib } 737257251Skib return (iotlbr); 738257251Skib} 739257251Skib 740257512Skibvoid 741257512Skibctx_flush_iotlb_sync(struct dmar_ctx *ctx, dmar_gaddr_t base, dmar_gaddr_t size) 742257251Skib{ 743257251Skib struct dmar_unit *unit; 744257251Skib dmar_gaddr_t isize; 745257251Skib uint64_t iotlbr; 746257251Skib int am, iro; 747257251Skib 748257251Skib unit = ctx->dmar; 749257512Skib KASSERT(!unit->qi_enabled, ("dmar%d: sync iotlb flush call", 750257512Skib unit->unit)); 751257251Skib iro = DMAR_ECAP_IRO(unit->hw_ecap) * 16; 752257251Skib DMAR_LOCK(unit); 753257251Skib if ((unit->hw_cap & DMAR_CAP_PSI) == 0 || size > 2 * 1024 * 1024) { 754257251Skib iotlbr = ctx_wait_iotlb_flush(unit, DMAR_IOTLB_IIRG_DOM | 755257251Skib DMAR_IOTLB_DID(ctx->domain), iro); 756257251Skib KASSERT((iotlbr & DMAR_IOTLB_IAIG_MASK) != 757257251Skib DMAR_IOTLB_IAIG_INVLD, 758257251Skib ("dmar%d: invalidation failed %jx", unit->unit, 759257251Skib (uintmax_t)iotlbr)); 760257251Skib } else { 761257251Skib for (; size > 0; base += isize, size -= isize) { 762257512Skib am = calc_am(unit, base, size, &isize); 763257251Skib dmar_write8(unit, iro, base | am); 764257251Skib iotlbr = ctx_wait_iotlb_flush(unit, 765257251Skib DMAR_IOTLB_IIRG_PAGE | DMAR_IOTLB_DID(ctx->domain), 766257251Skib iro); 767257251Skib KASSERT((iotlbr & DMAR_IOTLB_IAIG_MASK) != 768257251Skib DMAR_IOTLB_IAIG_INVLD, 769257251Skib ("dmar%d: PSI invalidation failed " 770257251Skib "iotlbr 0x%jx base 0x%jx size 0x%jx am %d", 771257251Skib unit->unit, (uintmax_t)iotlbr, 772257251Skib (uintmax_t)base, (uintmax_t)size, am)); 773257251Skib /* 774257251Skib * Any non-page granularity covers whole guest 775257251Skib * address space for the domain. 776257251Skib */ 777257251Skib if ((iotlbr & DMAR_IOTLB_IAIG_MASK) != 778257251Skib DMAR_IOTLB_IAIG_PAGE) 779257251Skib break; 780257251Skib } 781257251Skib } 782257251Skib DMAR_UNLOCK(unit); 783257251Skib} 784