Deleted Added
full compact
intel_utils.c (257251) intel_utils.c (259512)
1/*-
2 * Copyright (c) 2013 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
6 * under sponsorship from the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 2013 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
6 * under sponsorship from the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/x86/iommu/intel_utils.c 257251 2013-10-28 13:33:29Z kib $");
31__FBSDID("$FreeBSD: stable/10/sys/x86/iommu/intel_utils.c 259512 2013-12-17 13:49:35Z kib $");
32
33#include <sys/param.h>
34#include <sys/bus.h>
35#include <sys/kernel.h>
36#include <sys/lock.h>
37#include <sys/malloc.h>
38#include <sys/memdesc.h>
39#include <sys/mutex.h>
40#include <sys/proc.h>
41#include <sys/queue.h>
42#include <sys/rman.h>
43#include <sys/rwlock.h>
44#include <sys/sched.h>
45#include <sys/sf_buf.h>
46#include <sys/sysctl.h>
47#include <sys/systm.h>
48#include <sys/taskqueue.h>
49#include <sys/tree.h>
50#include <vm/vm.h>
51#include <vm/vm_extern.h>
52#include <vm/vm_kern.h>
53#include <vm/vm_object.h>
54#include <vm/vm_page.h>
55#include <vm/vm_map.h>
56#include <vm/vm_pageout.h>
57#include <machine/bus.h>
58#include <machine/cpu.h>
59#include <x86/include/busdma_impl.h>
60#include <x86/iommu/intel_reg.h>
61#include <x86/iommu/busdma_dmar.h>
62#include <x86/iommu/intel_dmar.h>
63
64u_int
65dmar_nd2mask(u_int nd)
66{
67 static const u_int masks[] = {
68 0x000f, /* nd == 0 */
69 0x002f, /* nd == 1 */
70 0x00ff, /* nd == 2 */
71 0x02ff, /* nd == 3 */
72 0x0fff, /* nd == 4 */
73 0x2fff, /* nd == 5 */
74 0xffff, /* nd == 6 */
75 0x0000, /* nd == 7 reserved */
76 };
77
78 KASSERT(nd <= 6, ("number of domains %d", nd));
79 return (masks[nd]);
80}
81
82static const struct sagaw_bits_tag {
83 int agaw;
84 int cap;
85 int awlvl;
86 int pglvl;
87} sagaw_bits[] = {
88 {.agaw = 30, .cap = DMAR_CAP_SAGAW_2LVL, .awlvl = DMAR_CTX2_AW_2LVL,
89 .pglvl = 2},
90 {.agaw = 39, .cap = DMAR_CAP_SAGAW_3LVL, .awlvl = DMAR_CTX2_AW_3LVL,
91 .pglvl = 3},
92 {.agaw = 48, .cap = DMAR_CAP_SAGAW_4LVL, .awlvl = DMAR_CTX2_AW_4LVL,
93 .pglvl = 4},
94 {.agaw = 57, .cap = DMAR_CAP_SAGAW_5LVL, .awlvl = DMAR_CTX2_AW_5LVL,
95 .pglvl = 5},
96 {.agaw = 64, .cap = DMAR_CAP_SAGAW_6LVL, .awlvl = DMAR_CTX2_AW_6LVL,
97 .pglvl = 6}
98};
99#define SIZEOF_SAGAW_BITS (sizeof(sagaw_bits) / sizeof(sagaw_bits[0]))
100
101bool
102dmar_pglvl_supported(struct dmar_unit *unit, int pglvl)
103{
104 int i;
105
106 for (i = 0; i < SIZEOF_SAGAW_BITS; i++) {
107 if (sagaw_bits[i].pglvl != pglvl)
108 continue;
109 if ((DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap) != 0)
110 return (true);
111 }
112 return (false);
113}
114
115int
116ctx_set_agaw(struct dmar_ctx *ctx, int mgaw)
117{
118 int sagaw, i;
119
120 ctx->mgaw = mgaw;
121 sagaw = DMAR_CAP_SAGAW(ctx->dmar->hw_cap);
122 for (i = 0; i < SIZEOF_SAGAW_BITS; i++) {
123 if (sagaw_bits[i].agaw >= mgaw) {
124 ctx->agaw = sagaw_bits[i].agaw;
125 ctx->pglvl = sagaw_bits[i].pglvl;
126 ctx->awlvl = sagaw_bits[i].awlvl;
127 return (0);
128 }
129 }
130 device_printf(ctx->dmar->dev,
131 "context request mgaw %d for pci%d:%d:%d:%d, "
132 "no agaw found, sagaw %x\n", mgaw, ctx->dmar->segment, ctx->bus,
133 ctx->slot, ctx->func, sagaw);
134 return (EINVAL);
135}
136
137/*
138 * Find a best fit mgaw for the given maxaddr:
139 * - if allow_less is false, must find sagaw which maps all requested
140 * addresses (used by identity mappings);
141 * - if allow_less is true, and no supported sagaw can map all requested
142 * address space, accept the biggest sagaw, whatever is it.
143 */
144int
145dmar_maxaddr2mgaw(struct dmar_unit *unit, dmar_gaddr_t maxaddr, bool allow_less)
146{
147 int i;
148
149 for (i = 0; i < SIZEOF_SAGAW_BITS; i++) {
150 if ((1ULL << sagaw_bits[i].agaw) >= maxaddr &&
151 (DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap) != 0)
152 break;
153 }
154 if (allow_less && i == SIZEOF_SAGAW_BITS) {
155 do {
156 i--;
157 } while ((DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap)
158 == 0);
159 }
160 if (i < SIZEOF_SAGAW_BITS)
161 return (sagaw_bits[i].agaw);
162 KASSERT(0, ("no mgaw for maxaddr %jx allow_less %d",
163 (uintmax_t) maxaddr, allow_less));
164 return (-1);
165}
166
167/*
168 * Calculate the total amount of page table pages needed to map the
169 * whole bus address space on the context with the selected agaw.
170 */
171vm_pindex_t
172pglvl_max_pages(int pglvl)
173{
174 vm_pindex_t res;
175 int i;
176
177 for (res = 0, i = pglvl; i > 0; i--) {
178 res *= DMAR_NPTEPG;
179 res++;
180 }
181 return (res);
182}
183
184/*
185 * Return true if the page table level lvl supports the superpage for
186 * the context ctx.
187 */
188int
189ctx_is_sp_lvl(struct dmar_ctx *ctx, int lvl)
190{
191 int alvl, cap_sps;
192 static const int sagaw_sp[] = {
193 DMAR_CAP_SPS_2M,
194 DMAR_CAP_SPS_1G,
195 DMAR_CAP_SPS_512G,
196 DMAR_CAP_SPS_1T
197 };
198
199 alvl = ctx->pglvl - lvl - 1;
200 cap_sps = DMAR_CAP_SPS(ctx->dmar->hw_cap);
201 return (alvl < sizeof(sagaw_sp) / sizeof(sagaw_sp[0]) &&
202 (sagaw_sp[alvl] & cap_sps) != 0);
203}
204
205dmar_gaddr_t
206pglvl_page_size(int total_pglvl, int lvl)
207{
208 int rlvl;
209 static const dmar_gaddr_t pg_sz[] = {
210 (dmar_gaddr_t)DMAR_PAGE_SIZE,
211 (dmar_gaddr_t)DMAR_PAGE_SIZE << DMAR_NPTEPGSHIFT,
212 (dmar_gaddr_t)DMAR_PAGE_SIZE << (2 * DMAR_NPTEPGSHIFT),
213 (dmar_gaddr_t)DMAR_PAGE_SIZE << (3 * DMAR_NPTEPGSHIFT),
214 (dmar_gaddr_t)DMAR_PAGE_SIZE << (4 * DMAR_NPTEPGSHIFT),
215 (dmar_gaddr_t)DMAR_PAGE_SIZE << (5 * DMAR_NPTEPGSHIFT)
216 };
217
218 KASSERT(lvl >= 0 && lvl < total_pglvl,
219 ("total %d lvl %d", total_pglvl, lvl));
220 rlvl = total_pglvl - lvl - 1;
221 KASSERT(rlvl < sizeof(pg_sz) / sizeof(pg_sz[0]),
222 ("sizeof pg_sz lvl %d", lvl));
223 return (pg_sz[rlvl]);
224}
225
226dmar_gaddr_t
227ctx_page_size(struct dmar_ctx *ctx, int lvl)
228{
229
230 return (pglvl_page_size(ctx->pglvl, lvl));
231}
232
32
33#include <sys/param.h>
34#include <sys/bus.h>
35#include <sys/kernel.h>
36#include <sys/lock.h>
37#include <sys/malloc.h>
38#include <sys/memdesc.h>
39#include <sys/mutex.h>
40#include <sys/proc.h>
41#include <sys/queue.h>
42#include <sys/rman.h>
43#include <sys/rwlock.h>
44#include <sys/sched.h>
45#include <sys/sf_buf.h>
46#include <sys/sysctl.h>
47#include <sys/systm.h>
48#include <sys/taskqueue.h>
49#include <sys/tree.h>
50#include <vm/vm.h>
51#include <vm/vm_extern.h>
52#include <vm/vm_kern.h>
53#include <vm/vm_object.h>
54#include <vm/vm_page.h>
55#include <vm/vm_map.h>
56#include <vm/vm_pageout.h>
57#include <machine/bus.h>
58#include <machine/cpu.h>
59#include <x86/include/busdma_impl.h>
60#include <x86/iommu/intel_reg.h>
61#include <x86/iommu/busdma_dmar.h>
62#include <x86/iommu/intel_dmar.h>
63
64u_int
65dmar_nd2mask(u_int nd)
66{
67 static const u_int masks[] = {
68 0x000f, /* nd == 0 */
69 0x002f, /* nd == 1 */
70 0x00ff, /* nd == 2 */
71 0x02ff, /* nd == 3 */
72 0x0fff, /* nd == 4 */
73 0x2fff, /* nd == 5 */
74 0xffff, /* nd == 6 */
75 0x0000, /* nd == 7 reserved */
76 };
77
78 KASSERT(nd <= 6, ("number of domains %d", nd));
79 return (masks[nd]);
80}
81
82static const struct sagaw_bits_tag {
83 int agaw;
84 int cap;
85 int awlvl;
86 int pglvl;
87} sagaw_bits[] = {
88 {.agaw = 30, .cap = DMAR_CAP_SAGAW_2LVL, .awlvl = DMAR_CTX2_AW_2LVL,
89 .pglvl = 2},
90 {.agaw = 39, .cap = DMAR_CAP_SAGAW_3LVL, .awlvl = DMAR_CTX2_AW_3LVL,
91 .pglvl = 3},
92 {.agaw = 48, .cap = DMAR_CAP_SAGAW_4LVL, .awlvl = DMAR_CTX2_AW_4LVL,
93 .pglvl = 4},
94 {.agaw = 57, .cap = DMAR_CAP_SAGAW_5LVL, .awlvl = DMAR_CTX2_AW_5LVL,
95 .pglvl = 5},
96 {.agaw = 64, .cap = DMAR_CAP_SAGAW_6LVL, .awlvl = DMAR_CTX2_AW_6LVL,
97 .pglvl = 6}
98};
99#define SIZEOF_SAGAW_BITS (sizeof(sagaw_bits) / sizeof(sagaw_bits[0]))
100
101bool
102dmar_pglvl_supported(struct dmar_unit *unit, int pglvl)
103{
104 int i;
105
106 for (i = 0; i < SIZEOF_SAGAW_BITS; i++) {
107 if (sagaw_bits[i].pglvl != pglvl)
108 continue;
109 if ((DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap) != 0)
110 return (true);
111 }
112 return (false);
113}
114
115int
116ctx_set_agaw(struct dmar_ctx *ctx, int mgaw)
117{
118 int sagaw, i;
119
120 ctx->mgaw = mgaw;
121 sagaw = DMAR_CAP_SAGAW(ctx->dmar->hw_cap);
122 for (i = 0; i < SIZEOF_SAGAW_BITS; i++) {
123 if (sagaw_bits[i].agaw >= mgaw) {
124 ctx->agaw = sagaw_bits[i].agaw;
125 ctx->pglvl = sagaw_bits[i].pglvl;
126 ctx->awlvl = sagaw_bits[i].awlvl;
127 return (0);
128 }
129 }
130 device_printf(ctx->dmar->dev,
131 "context request mgaw %d for pci%d:%d:%d:%d, "
132 "no agaw found, sagaw %x\n", mgaw, ctx->dmar->segment, ctx->bus,
133 ctx->slot, ctx->func, sagaw);
134 return (EINVAL);
135}
136
137/*
138 * Find a best fit mgaw for the given maxaddr:
139 * - if allow_less is false, must find sagaw which maps all requested
140 * addresses (used by identity mappings);
141 * - if allow_less is true, and no supported sagaw can map all requested
142 * address space, accept the biggest sagaw, whatever is it.
143 */
144int
145dmar_maxaddr2mgaw(struct dmar_unit *unit, dmar_gaddr_t maxaddr, bool allow_less)
146{
147 int i;
148
149 for (i = 0; i < SIZEOF_SAGAW_BITS; i++) {
150 if ((1ULL << sagaw_bits[i].agaw) >= maxaddr &&
151 (DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap) != 0)
152 break;
153 }
154 if (allow_less && i == SIZEOF_SAGAW_BITS) {
155 do {
156 i--;
157 } while ((DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap)
158 == 0);
159 }
160 if (i < SIZEOF_SAGAW_BITS)
161 return (sagaw_bits[i].agaw);
162 KASSERT(0, ("no mgaw for maxaddr %jx allow_less %d",
163 (uintmax_t) maxaddr, allow_less));
164 return (-1);
165}
166
167/*
168 * Calculate the total amount of page table pages needed to map the
169 * whole bus address space on the context with the selected agaw.
170 */
171vm_pindex_t
172pglvl_max_pages(int pglvl)
173{
174 vm_pindex_t res;
175 int i;
176
177 for (res = 0, i = pglvl; i > 0; i--) {
178 res *= DMAR_NPTEPG;
179 res++;
180 }
181 return (res);
182}
183
184/*
185 * Return true if the page table level lvl supports the superpage for
186 * the context ctx.
187 */
188int
189ctx_is_sp_lvl(struct dmar_ctx *ctx, int lvl)
190{
191 int alvl, cap_sps;
192 static const int sagaw_sp[] = {
193 DMAR_CAP_SPS_2M,
194 DMAR_CAP_SPS_1G,
195 DMAR_CAP_SPS_512G,
196 DMAR_CAP_SPS_1T
197 };
198
199 alvl = ctx->pglvl - lvl - 1;
200 cap_sps = DMAR_CAP_SPS(ctx->dmar->hw_cap);
201 return (alvl < sizeof(sagaw_sp) / sizeof(sagaw_sp[0]) &&
202 (sagaw_sp[alvl] & cap_sps) != 0);
203}
204
205dmar_gaddr_t
206pglvl_page_size(int total_pglvl, int lvl)
207{
208 int rlvl;
209 static const dmar_gaddr_t pg_sz[] = {
210 (dmar_gaddr_t)DMAR_PAGE_SIZE,
211 (dmar_gaddr_t)DMAR_PAGE_SIZE << DMAR_NPTEPGSHIFT,
212 (dmar_gaddr_t)DMAR_PAGE_SIZE << (2 * DMAR_NPTEPGSHIFT),
213 (dmar_gaddr_t)DMAR_PAGE_SIZE << (3 * DMAR_NPTEPGSHIFT),
214 (dmar_gaddr_t)DMAR_PAGE_SIZE << (4 * DMAR_NPTEPGSHIFT),
215 (dmar_gaddr_t)DMAR_PAGE_SIZE << (5 * DMAR_NPTEPGSHIFT)
216 };
217
218 KASSERT(lvl >= 0 && lvl < total_pglvl,
219 ("total %d lvl %d", total_pglvl, lvl));
220 rlvl = total_pglvl - lvl - 1;
221 KASSERT(rlvl < sizeof(pg_sz) / sizeof(pg_sz[0]),
222 ("sizeof pg_sz lvl %d", lvl));
223 return (pg_sz[rlvl]);
224}
225
226dmar_gaddr_t
227ctx_page_size(struct dmar_ctx *ctx, int lvl)
228{
229
230 return (pglvl_page_size(ctx->pglvl, lvl));
231}
232
233int
234calc_am(struct dmar_unit *unit, dmar_gaddr_t base, dmar_gaddr_t size,
235 dmar_gaddr_t *isizep)
236{
237 dmar_gaddr_t isize;
238 int am;
239
240 for (am = DMAR_CAP_MAMV(unit->hw_cap);; am--) {
241 isize = 1ULL << (am + DMAR_PAGE_SHIFT);
242 if ((base & (isize - 1)) == 0 && size >= isize)
243 break;
244 if (am == 0)
245 break;
246 }
247 *isizep = isize;
248 return (am);
249}
250
233dmar_haddr_t dmar_high;
234int haw;
235int dmar_tbl_pagecnt;
236
237vm_page_t
238dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags)
239{
240 vm_page_t m;
241 int zeroed;
242
243 zeroed = (flags & DMAR_PGF_ZERO) != 0 ? VM_ALLOC_ZERO : 0;
244 for (;;) {
245 if ((flags & DMAR_PGF_OBJL) == 0)
246 VM_OBJECT_WLOCK(obj);
247 m = vm_page_lookup(obj, idx);
248 if ((flags & DMAR_PGF_NOALLOC) != 0 || m != NULL) {
249 if ((flags & DMAR_PGF_OBJL) == 0)
250 VM_OBJECT_WUNLOCK(obj);
251 break;
252 }
253 m = vm_page_alloc_contig(obj, idx, VM_ALLOC_NOBUSY |
254 VM_ALLOC_SYSTEM | VM_ALLOC_NODUMP | zeroed, 1, 0,
255 dmar_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
256 if ((flags & DMAR_PGF_OBJL) == 0)
257 VM_OBJECT_WUNLOCK(obj);
258 if (m != NULL) {
259 if (zeroed && (m->flags & PG_ZERO) == 0)
260 pmap_zero_page(m);
261 atomic_add_int(&dmar_tbl_pagecnt, 1);
262 break;
263 }
264 if ((flags & DMAR_PGF_WAITOK) == 0)
265 break;
266 if ((flags & DMAR_PGF_OBJL) != 0)
267 VM_OBJECT_WUNLOCK(obj);
268 VM_WAIT;
269 if ((flags & DMAR_PGF_OBJL) != 0)
270 VM_OBJECT_WLOCK(obj);
271 }
272 return (m);
273}
274
275void
276dmar_pgfree(vm_object_t obj, vm_pindex_t idx, int flags)
277{
278 vm_page_t m;
279
280 if ((flags & DMAR_PGF_OBJL) == 0)
281 VM_OBJECT_WLOCK(obj);
282 m = vm_page_lookup(obj, idx);
283 if (m != NULL) {
284 vm_page_free(m);
285 atomic_subtract_int(&dmar_tbl_pagecnt, 1);
286 }
287 if ((flags & DMAR_PGF_OBJL) == 0)
288 VM_OBJECT_WUNLOCK(obj);
289}
290
291void *
292dmar_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags,
293 struct sf_buf **sf)
294{
295 vm_page_t m;
296 bool allocated;
297
298 if ((flags & DMAR_PGF_OBJL) == 0)
299 VM_OBJECT_WLOCK(obj);
300 m = vm_page_lookup(obj, idx);
301 if (m == NULL && (flags & DMAR_PGF_ALLOC) != 0) {
302 m = dmar_pgalloc(obj, idx, flags | DMAR_PGF_OBJL);
303 allocated = true;
304 } else
305 allocated = false;
306 if (m == NULL) {
307 if ((flags & DMAR_PGF_OBJL) == 0)
308 VM_OBJECT_WUNLOCK(obj);
309 return (NULL);
310 }
311 /* Sleepable allocations cannot fail. */
312 if ((flags & DMAR_PGF_WAITOK) != 0)
313 VM_OBJECT_WUNLOCK(obj);
314 sched_pin();
315 *sf = sf_buf_alloc(m, SFB_CPUPRIVATE | ((flags & DMAR_PGF_WAITOK)
316 == 0 ? SFB_NOWAIT : 0));
317 if (*sf == NULL) {
318 sched_unpin();
319 if (allocated) {
320 VM_OBJECT_ASSERT_WLOCKED(obj);
321 dmar_pgfree(obj, m->pindex, flags | DMAR_PGF_OBJL);
322 }
323 if ((flags & DMAR_PGF_OBJL) == 0)
324 VM_OBJECT_WUNLOCK(obj);
325 return (NULL);
326 }
327 if ((flags & (DMAR_PGF_WAITOK | DMAR_PGF_OBJL)) ==
328 (DMAR_PGF_WAITOK | DMAR_PGF_OBJL))
329 VM_OBJECT_WLOCK(obj);
330 else if ((flags & (DMAR_PGF_WAITOK | DMAR_PGF_OBJL)) == 0)
331 VM_OBJECT_WUNLOCK(obj);
332 return ((void *)sf_buf_kva(*sf));
333}
334
335void
336dmar_unmap_pgtbl(struct sf_buf *sf, bool coherent)
337{
338 vm_page_t m;
339
340 m = sf_buf_page(sf);
341 sf_buf_free(sf);
342 sched_unpin();
343
344 /*
345 * If DMAR does not snoop paging structures accesses, flush
346 * CPU cache to memory.
347 */
348 if (!coherent)
349 pmap_invalidate_cache_pages(&m, 1);
350}
351
352/*
353 * Load the root entry pointer into the hardware, busily waiting for
354 * the completion.
355 */
356int
357dmar_load_root_entry_ptr(struct dmar_unit *unit)
358{
359 vm_page_t root_entry;
360
361 /*
362 * Access to the GCMD register must be serialized while the
363 * command is submitted.
364 */
365 DMAR_ASSERT_LOCKED(unit);
366
367 /* VM_OBJECT_RLOCK(unit->ctx_obj); */
368 VM_OBJECT_WLOCK(unit->ctx_obj);
369 root_entry = vm_page_lookup(unit->ctx_obj, 0);
370 /* VM_OBJECT_RUNLOCK(unit->ctx_obj); */
371 VM_OBJECT_WUNLOCK(unit->ctx_obj);
372 dmar_write8(unit, DMAR_RTADDR_REG, VM_PAGE_TO_PHYS(root_entry));
373 dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd | DMAR_GCMD_SRTP);
374 /* XXXKIB should have a timeout */
375 while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_RTPS) == 0)
376 cpu_spinwait();
377 return (0);
378}
379
380/*
381 * Globally invalidate the context entries cache, busily waiting for
382 * the completion.
383 */
384int
385dmar_inv_ctx_glob(struct dmar_unit *unit)
386{
387
388 /*
389 * Access to the CCMD register must be serialized while the
390 * command is submitted.
391 */
392 DMAR_ASSERT_LOCKED(unit);
251dmar_haddr_t dmar_high;
252int haw;
253int dmar_tbl_pagecnt;
254
255vm_page_t
256dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags)
257{
258 vm_page_t m;
259 int zeroed;
260
261 zeroed = (flags & DMAR_PGF_ZERO) != 0 ? VM_ALLOC_ZERO : 0;
262 for (;;) {
263 if ((flags & DMAR_PGF_OBJL) == 0)
264 VM_OBJECT_WLOCK(obj);
265 m = vm_page_lookup(obj, idx);
266 if ((flags & DMAR_PGF_NOALLOC) != 0 || m != NULL) {
267 if ((flags & DMAR_PGF_OBJL) == 0)
268 VM_OBJECT_WUNLOCK(obj);
269 break;
270 }
271 m = vm_page_alloc_contig(obj, idx, VM_ALLOC_NOBUSY |
272 VM_ALLOC_SYSTEM | VM_ALLOC_NODUMP | zeroed, 1, 0,
273 dmar_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
274 if ((flags & DMAR_PGF_OBJL) == 0)
275 VM_OBJECT_WUNLOCK(obj);
276 if (m != NULL) {
277 if (zeroed && (m->flags & PG_ZERO) == 0)
278 pmap_zero_page(m);
279 atomic_add_int(&dmar_tbl_pagecnt, 1);
280 break;
281 }
282 if ((flags & DMAR_PGF_WAITOK) == 0)
283 break;
284 if ((flags & DMAR_PGF_OBJL) != 0)
285 VM_OBJECT_WUNLOCK(obj);
286 VM_WAIT;
287 if ((flags & DMAR_PGF_OBJL) != 0)
288 VM_OBJECT_WLOCK(obj);
289 }
290 return (m);
291}
292
293void
294dmar_pgfree(vm_object_t obj, vm_pindex_t idx, int flags)
295{
296 vm_page_t m;
297
298 if ((flags & DMAR_PGF_OBJL) == 0)
299 VM_OBJECT_WLOCK(obj);
300 m = vm_page_lookup(obj, idx);
301 if (m != NULL) {
302 vm_page_free(m);
303 atomic_subtract_int(&dmar_tbl_pagecnt, 1);
304 }
305 if ((flags & DMAR_PGF_OBJL) == 0)
306 VM_OBJECT_WUNLOCK(obj);
307}
308
309void *
310dmar_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags,
311 struct sf_buf **sf)
312{
313 vm_page_t m;
314 bool allocated;
315
316 if ((flags & DMAR_PGF_OBJL) == 0)
317 VM_OBJECT_WLOCK(obj);
318 m = vm_page_lookup(obj, idx);
319 if (m == NULL && (flags & DMAR_PGF_ALLOC) != 0) {
320 m = dmar_pgalloc(obj, idx, flags | DMAR_PGF_OBJL);
321 allocated = true;
322 } else
323 allocated = false;
324 if (m == NULL) {
325 if ((flags & DMAR_PGF_OBJL) == 0)
326 VM_OBJECT_WUNLOCK(obj);
327 return (NULL);
328 }
329 /* Sleepable allocations cannot fail. */
330 if ((flags & DMAR_PGF_WAITOK) != 0)
331 VM_OBJECT_WUNLOCK(obj);
332 sched_pin();
333 *sf = sf_buf_alloc(m, SFB_CPUPRIVATE | ((flags & DMAR_PGF_WAITOK)
334 == 0 ? SFB_NOWAIT : 0));
335 if (*sf == NULL) {
336 sched_unpin();
337 if (allocated) {
338 VM_OBJECT_ASSERT_WLOCKED(obj);
339 dmar_pgfree(obj, m->pindex, flags | DMAR_PGF_OBJL);
340 }
341 if ((flags & DMAR_PGF_OBJL) == 0)
342 VM_OBJECT_WUNLOCK(obj);
343 return (NULL);
344 }
345 if ((flags & (DMAR_PGF_WAITOK | DMAR_PGF_OBJL)) ==
346 (DMAR_PGF_WAITOK | DMAR_PGF_OBJL))
347 VM_OBJECT_WLOCK(obj);
348 else if ((flags & (DMAR_PGF_WAITOK | DMAR_PGF_OBJL)) == 0)
349 VM_OBJECT_WUNLOCK(obj);
350 return ((void *)sf_buf_kva(*sf));
351}
352
353void
354dmar_unmap_pgtbl(struct sf_buf *sf, bool coherent)
355{
356 vm_page_t m;
357
358 m = sf_buf_page(sf);
359 sf_buf_free(sf);
360 sched_unpin();
361
362 /*
363 * If DMAR does not snoop paging structures accesses, flush
364 * CPU cache to memory.
365 */
366 if (!coherent)
367 pmap_invalidate_cache_pages(&m, 1);
368}
369
370/*
371 * Load the root entry pointer into the hardware, busily waiting for
372 * the completion.
373 */
374int
375dmar_load_root_entry_ptr(struct dmar_unit *unit)
376{
377 vm_page_t root_entry;
378
379 /*
380 * Access to the GCMD register must be serialized while the
381 * command is submitted.
382 */
383 DMAR_ASSERT_LOCKED(unit);
384
385 /* VM_OBJECT_RLOCK(unit->ctx_obj); */
386 VM_OBJECT_WLOCK(unit->ctx_obj);
387 root_entry = vm_page_lookup(unit->ctx_obj, 0);
388 /* VM_OBJECT_RUNLOCK(unit->ctx_obj); */
389 VM_OBJECT_WUNLOCK(unit->ctx_obj);
390 dmar_write8(unit, DMAR_RTADDR_REG, VM_PAGE_TO_PHYS(root_entry));
391 dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd | DMAR_GCMD_SRTP);
392 /* XXXKIB should have a timeout */
393 while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_RTPS) == 0)
394 cpu_spinwait();
395 return (0);
396}
397
398/*
399 * Globally invalidate the context entries cache, busily waiting for
400 * the completion.
401 */
402int
403dmar_inv_ctx_glob(struct dmar_unit *unit)
404{
405
406 /*
407 * Access to the CCMD register must be serialized while the
408 * command is submitted.
409 */
410 DMAR_ASSERT_LOCKED(unit);
411 KASSERT(!unit->qi_enabled, ("QI enabled"));
393
394 /*
395 * The DMAR_CCMD_ICC bit in the upper dword should be written
396 * after the low dword write is completed. Amd64
397 * dmar_write8() does not have this issue, i386 dmar_write8()
398 * writes the upper dword last.
399 */
400 dmar_write8(unit, DMAR_CCMD_REG, DMAR_CCMD_ICC | DMAR_CCMD_CIRG_GLOB);
401 /* XXXKIB should have a timeout */
402 while ((dmar_read4(unit, DMAR_CCMD_REG + 4) & DMAR_CCMD_ICC32) != 0)
403 cpu_spinwait();
404 return (0);
405}
406
407/*
408 * Globally invalidate the IOTLB, busily waiting for the completion.
409 */
410int
411dmar_inv_iotlb_glob(struct dmar_unit *unit)
412{
413 int reg;
414
415 DMAR_ASSERT_LOCKED(unit);
412
413 /*
414 * The DMAR_CCMD_ICC bit in the upper dword should be written
415 * after the low dword write is completed. Amd64
416 * dmar_write8() does not have this issue, i386 dmar_write8()
417 * writes the upper dword last.
418 */
419 dmar_write8(unit, DMAR_CCMD_REG, DMAR_CCMD_ICC | DMAR_CCMD_CIRG_GLOB);
420 /* XXXKIB should have a timeout */
421 while ((dmar_read4(unit, DMAR_CCMD_REG + 4) & DMAR_CCMD_ICC32) != 0)
422 cpu_spinwait();
423 return (0);
424}
425
426/*
427 * Globally invalidate the IOTLB, busily waiting for the completion.
428 */
429int
430dmar_inv_iotlb_glob(struct dmar_unit *unit)
431{
432 int reg;
433
434 DMAR_ASSERT_LOCKED(unit);
435 KASSERT(!unit->qi_enabled, ("QI enabled"));
416
417 reg = 16 * DMAR_ECAP_IRO(unit->hw_ecap);
418 /* See a comment about DMAR_CCMD_ICC in dmar_inv_ctx_glob. */
419 dmar_write8(unit, reg + DMAR_IOTLB_REG_OFF, DMAR_IOTLB_IVT |
420 DMAR_IOTLB_IIRG_GLB | DMAR_IOTLB_DR | DMAR_IOTLB_DW);
421 /* XXXKIB should have a timeout */
422 while ((dmar_read4(unit, reg + DMAR_IOTLB_REG_OFF + 4) &
423 DMAR_IOTLB_IVT32) != 0)
424 cpu_spinwait();
425 return (0);
426}
427
428/*
429 * Flush the chipset write buffers. See 11.1 "Write Buffer Flushing"
430 * in the architecture specification.
431 */
432int
433dmar_flush_write_bufs(struct dmar_unit *unit)
434{
435
436 DMAR_ASSERT_LOCKED(unit);
437
438 /*
439 * DMAR_GCMD_WBF is only valid when CAP_RWBF is reported.
440 */
441 KASSERT((unit->hw_cap & DMAR_CAP_RWBF) != 0,
442 ("dmar%d: no RWBF", unit->unit));
443
444 dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd | DMAR_GCMD_WBF);
445 /* XXXKIB should have a timeout */
446 while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_WBFS) == 0)
447 cpu_spinwait();
448 return (0);
449}
450
451int
452dmar_enable_translation(struct dmar_unit *unit)
453{
454
455 DMAR_ASSERT_LOCKED(unit);
456 unit->hw_gcmd |= DMAR_GCMD_TE;
457 dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd);
458 /* XXXKIB should have a timeout */
459 while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_TES) == 0)
460 cpu_spinwait();
461 return (0);
462}
463
464int
465dmar_disable_translation(struct dmar_unit *unit)
466{
467
468 DMAR_ASSERT_LOCKED(unit);
469 unit->hw_gcmd &= ~DMAR_GCMD_TE;
470 dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd);
471 /* XXXKIB should have a timeout */
472 while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_TES) != 0)
473 cpu_spinwait();
474 return (0);
475}
476
436
437 reg = 16 * DMAR_ECAP_IRO(unit->hw_ecap);
438 /* See a comment about DMAR_CCMD_ICC in dmar_inv_ctx_glob. */
439 dmar_write8(unit, reg + DMAR_IOTLB_REG_OFF, DMAR_IOTLB_IVT |
440 DMAR_IOTLB_IIRG_GLB | DMAR_IOTLB_DR | DMAR_IOTLB_DW);
441 /* XXXKIB should have a timeout */
442 while ((dmar_read4(unit, reg + DMAR_IOTLB_REG_OFF + 4) &
443 DMAR_IOTLB_IVT32) != 0)
444 cpu_spinwait();
445 return (0);
446}
447
448/*
449 * Flush the chipset write buffers. See 11.1 "Write Buffer Flushing"
450 * in the architecture specification.
451 */
452int
453dmar_flush_write_bufs(struct dmar_unit *unit)
454{
455
456 DMAR_ASSERT_LOCKED(unit);
457
458 /*
459 * DMAR_GCMD_WBF is only valid when CAP_RWBF is reported.
460 */
461 KASSERT((unit->hw_cap & DMAR_CAP_RWBF) != 0,
462 ("dmar%d: no RWBF", unit->unit));
463
464 dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd | DMAR_GCMD_WBF);
465 /* XXXKIB should have a timeout */
466 while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_WBFS) == 0)
467 cpu_spinwait();
468 return (0);
469}
470
471int
472dmar_enable_translation(struct dmar_unit *unit)
473{
474
475 DMAR_ASSERT_LOCKED(unit);
476 unit->hw_gcmd |= DMAR_GCMD_TE;
477 dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd);
478 /* XXXKIB should have a timeout */
479 while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_TES) == 0)
480 cpu_spinwait();
481 return (0);
482}
483
484int
485dmar_disable_translation(struct dmar_unit *unit)
486{
487
488 DMAR_ASSERT_LOCKED(unit);
489 unit->hw_gcmd &= ~DMAR_GCMD_TE;
490 dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd);
491 /* XXXKIB should have a timeout */
492 while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_TES) != 0)
493 cpu_spinwait();
494 return (0);
495}
496
477void
478dmar_enable_intr(struct dmar_unit *unit)
479{
480 uint32_t fectl;
481
482 fectl = dmar_read4(unit, DMAR_FECTL_REG);
483 fectl &= ~DMAR_FECTL_IM;
484 dmar_write4(unit, DMAR_FECTL_REG, fectl);
485}
486
487void
488dmar_disable_intr(struct dmar_unit *unit)
489{
490 uint32_t fectl;
491
492 fectl = dmar_read4(unit, DMAR_FECTL_REG);
493 dmar_write4(unit, DMAR_FECTL_REG, fectl | DMAR_FECTL_IM);
494}
495
496#define BARRIER_F \
497 u_int f_done, f_inproc, f_wakeup; \
498 \
499 f_done = 1 << (barrier_id * 3); \
500 f_inproc = 1 << (barrier_id * 3 + 1); \
501 f_wakeup = 1 << (barrier_id * 3 + 2)
502
503bool
504dmar_barrier_enter(struct dmar_unit *dmar, u_int barrier_id)
505{
506 BARRIER_F;
507
508 DMAR_LOCK(dmar);
509 if ((dmar->barrier_flags & f_done) != 0) {
510 DMAR_UNLOCK(dmar);
511 return (false);
512 }
513
514 if ((dmar->barrier_flags & f_inproc) != 0) {
515 while ((dmar->barrier_flags & f_inproc) != 0) {
516 dmar->barrier_flags |= f_wakeup;
517 msleep(&dmar->barrier_flags, &dmar->lock, 0,
518 "dmarb", 0);
519 }
520 KASSERT((dmar->barrier_flags & f_done) != 0,
521 ("dmar%d barrier %d missing done", dmar->unit, barrier_id));
522 DMAR_UNLOCK(dmar);
523 return (false);
524 }
525
526 dmar->barrier_flags |= f_inproc;
527 DMAR_UNLOCK(dmar);
528 return (true);
529}
530
531void
532dmar_barrier_exit(struct dmar_unit *dmar, u_int barrier_id)
533{
534 BARRIER_F;
535
536 DMAR_ASSERT_LOCKED(dmar);
537 KASSERT((dmar->barrier_flags & (f_done | f_inproc)) == f_inproc,
538 ("dmar%d barrier %d missed entry", dmar->unit, barrier_id));
539 dmar->barrier_flags |= f_done;
540 if ((dmar->barrier_flags & f_wakeup) != 0)
541 wakeup(&dmar->barrier_flags);
542 dmar->barrier_flags &= ~(f_inproc | f_wakeup);
543 DMAR_UNLOCK(dmar);
544}
545
546int dmar_match_verbose;
547
548static SYSCTL_NODE(_hw, OID_AUTO, dmar, CTLFLAG_RD, NULL,
549 "");
550SYSCTL_INT(_hw_dmar, OID_AUTO, tbl_pagecnt, CTLFLAG_RD | CTLFLAG_TUN,
551 &dmar_tbl_pagecnt, 0,
552 "Count of pages used for DMAR pagetables");
553SYSCTL_INT(_hw_dmar, OID_AUTO, match_verbose, CTLFLAG_RW | CTLFLAG_TUN,
554 &dmar_match_verbose, 0,
555 "Verbose matching of the PCI devices to DMAR paths");
556#ifdef INVARIANTS
557int dmar_check_free;
558SYSCTL_INT(_hw_dmar, OID_AUTO, check_free, CTLFLAG_RW | CTLFLAG_TUN,
559 &dmar_check_free, 0,
560 "Check the GPA RBtree for free_down and free_after validity");
561#endif
562
497#define BARRIER_F \
498 u_int f_done, f_inproc, f_wakeup; \
499 \
500 f_done = 1 << (barrier_id * 3); \
501 f_inproc = 1 << (barrier_id * 3 + 1); \
502 f_wakeup = 1 << (barrier_id * 3 + 2)
503
504bool
505dmar_barrier_enter(struct dmar_unit *dmar, u_int barrier_id)
506{
507 BARRIER_F;
508
509 DMAR_LOCK(dmar);
510 if ((dmar->barrier_flags & f_done) != 0) {
511 DMAR_UNLOCK(dmar);
512 return (false);
513 }
514
515 if ((dmar->barrier_flags & f_inproc) != 0) {
516 while ((dmar->barrier_flags & f_inproc) != 0) {
517 dmar->barrier_flags |= f_wakeup;
518 msleep(&dmar->barrier_flags, &dmar->lock, 0,
519 "dmarb", 0);
520 }
521 KASSERT((dmar->barrier_flags & f_done) != 0,
522 ("dmar%d barrier %d missing done", dmar->unit, barrier_id));
523 DMAR_UNLOCK(dmar);
524 return (false);
525 }
526
527 dmar->barrier_flags |= f_inproc;
528 DMAR_UNLOCK(dmar);
529 return (true);
530}
531
532void
533dmar_barrier_exit(struct dmar_unit *dmar, u_int barrier_id)
534{
535 BARRIER_F;
536
537 DMAR_ASSERT_LOCKED(dmar);
538 KASSERT((dmar->barrier_flags & (f_done | f_inproc)) == f_inproc,
539 ("dmar%d barrier %d missed entry", dmar->unit, barrier_id));
540 dmar->barrier_flags |= f_done;
541 if ((dmar->barrier_flags & f_wakeup) != 0)
542 wakeup(&dmar->barrier_flags);
543 dmar->barrier_flags &= ~(f_inproc | f_wakeup);
544 DMAR_UNLOCK(dmar);
545}
546
547int dmar_match_verbose;
548
549static SYSCTL_NODE(_hw, OID_AUTO, dmar, CTLFLAG_RD, NULL,
550 "");
551SYSCTL_INT(_hw_dmar, OID_AUTO, tbl_pagecnt, CTLFLAG_RD | CTLFLAG_TUN,
552 &dmar_tbl_pagecnt, 0,
553 "Count of pages used for DMAR pagetables");
554SYSCTL_INT(_hw_dmar, OID_AUTO, match_verbose, CTLFLAG_RW | CTLFLAG_TUN,
555 &dmar_match_verbose, 0,
556 "Verbose matching of the PCI devices to DMAR paths");
557#ifdef INVARIANTS
558int dmar_check_free;
559SYSCTL_INT(_hw_dmar, OID_AUTO, check_free, CTLFLAG_RW | CTLFLAG_TUN,
560 &dmar_check_free, 0,
561 "Check the GPA RBtree for free_down and free_after validity");
562#endif
563