1/*	$NetBSD: x86_pte_tester.c,v 1.3 2022/08/21 14:06:42 mlelstv Exp $	*/
2
3/*
4 * Copyright (c) 2016 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#define __HAVE_DIRECT_MAP
30#define __HAVE_PCPU_AREA
31#define SVS
32
33#include <sys/cdefs.h>
34#include <sys/param.h>
35#include <sys/module.h>
36#include <sys/proc.h>
37#include <sys/sysctl.h>
38#include <uvm/uvm.h>
39#include <x86/pmap.h>
40
41#if defined(__x86_64__)
42# include <amd64/pmap.h>
43# include <amd64/pmap_private.h>
44# define NLEVEL 4
45#else
46# error "Unsupported configuration"
47#endif
48
49static struct {
50	struct sysctllog *ctx_sysctllog;
51	vaddr_t levels[NLEVEL];
52	struct {
53		size_t l4;
54		size_t l3;
55		size_t l2;
56		size_t l1;
57	} coord;
58	struct {
59		size_t n_rwx;
60		size_t n_shstk;
61		bool kernel_map_with_low_ptes;
62		bool pte_is_user_accessible;
63		size_t n_user_space_is_kernel;
64		size_t n_kernel_space_is_user;
65		size_t n_svs_g_bit_set;
66	} results;
67} tester_ctx;
68
69typedef enum {
70	WALK_NEXT, /* go to the next level */
71	WALK_SKIP, /* skip the next level, but keep iterating on the current one */
72	WALK_STOP  /* stop the iteration on the current level */
73} walk_type;
74
75/* -------------------------------------------------------------------------- */
76
77#define is_flag(__ent, __flag)	(((__ent) & __flag) != 0)
78#define is_valid(__ent)		is_flag(__ent, PTE_P)
79#define get_pa(__pde)		(__pde & PTE_FRAME)
80
81#define L4_MAX_NENTRIES (PAGE_SIZE / sizeof(pd_entry_t))
82#define L3_MAX_NENTRIES (PAGE_SIZE / sizeof(pd_entry_t))
83#define L2_MAX_NENTRIES (PAGE_SIZE / sizeof(pd_entry_t))
84#define L1_MAX_NENTRIES (PAGE_SIZE / sizeof(pd_entry_t))
85
86static void
87scan_l1(paddr_t pa, walk_type (fn)(pd_entry_t pde, size_t slot, int lvl))
88{
89	pd_entry_t *pd = (pd_entry_t *)tester_ctx.levels[0];
90	size_t i;
91
92	pmap_kenter_pa(tester_ctx.levels[0], pa, VM_PROT_READ, 0);
93	pmap_update(pmap_kernel());
94
95	for (i = 0; i < L1_MAX_NENTRIES; i++) {
96		tester_ctx.coord.l1 = i;
97		if (is_valid(pd[i])) {
98			fn(pd[i], i, 1);
99		}
100	}
101
102	pmap_kremove(tester_ctx.levels[0], PAGE_SIZE);
103	pmap_update(pmap_kernel());
104}
105
106static void
107scan_l2(paddr_t pa, walk_type (fn)(pd_entry_t pde, size_t slot, int lvl))
108{
109	pd_entry_t *pd = (pd_entry_t *)tester_ctx.levels[1];
110	walk_type ret;
111	size_t i;
112
113	pmap_kenter_pa(tester_ctx.levels[1], pa, VM_PROT_READ, 0);
114	pmap_update(pmap_kernel());
115
116	for (i = 0; i < L2_MAX_NENTRIES; i++) {
117		tester_ctx.coord.l2 = i;
118		if (!is_valid(pd[i]))
119			continue;
120		ret = fn(pd[i], i, 2);
121		if (ret == WALK_STOP)
122			break;
123		if (is_flag(pd[i], PTE_PS))
124			continue;
125		if (ret == WALK_NEXT)
126			scan_l1(get_pa(pd[i]), fn);
127	}
128
129	pmap_kremove(tester_ctx.levels[1], PAGE_SIZE);
130	pmap_update(pmap_kernel());
131}
132
133static void
134scan_l3(paddr_t pa, walk_type (fn)(pd_entry_t pde, size_t slot, int lvl))
135{
136	pd_entry_t *pd = (pd_entry_t *)tester_ctx.levels[2];
137	walk_type ret;
138	size_t i;
139
140	pmap_kenter_pa(tester_ctx.levels[2], pa, VM_PROT_READ, 0);
141	pmap_update(pmap_kernel());
142
143	for (i = 0; i < L3_MAX_NENTRIES; i++) {
144		tester_ctx.coord.l3 = i;
145		if (!is_valid(pd[i]))
146			continue;
147		ret = fn(pd[i], i, 3);
148		if (ret == WALK_STOP)
149			break;
150		if (is_flag(pd[i], PTE_PS))
151			continue;
152		if (ret == WALK_NEXT)
153			scan_l2(get_pa(pd[i]), fn);
154	}
155
156	pmap_kremove(tester_ctx.levels[2], PAGE_SIZE);
157	pmap_update(pmap_kernel());
158}
159
160static void
161scan_l4(paddr_t pa, walk_type (fn)(pd_entry_t pde, size_t slot, int lvl))
162{
163	pd_entry_t *pd = (pd_entry_t *)tester_ctx.levels[3];
164	walk_type ret;
165	size_t i;
166
167	pmap_kenter_pa(tester_ctx.levels[3], pa, VM_PROT_READ, 0);
168	pmap_update(pmap_kernel());
169
170	for (i = 0; i < L4_MAX_NENTRIES; i++) {
171		tester_ctx.coord.l4 = i;
172		if (!is_valid(pd[i]))
173			continue;
174		ret = fn(pd[i], i, 4);
175		if (ret == WALK_STOP)
176			break;
177		if (is_flag(pd[i], PTE_PS))
178			continue;
179		if (ret == WALK_NEXT)
180			scan_l3(get_pa(pd[i]), fn);
181	}
182
183	pmap_kremove(tester_ctx.levels[3], PAGE_SIZE);
184	pmap_update(pmap_kernel());
185}
186
187static void
188scan_tree(paddr_t pa, walk_type (fn)(pd_entry_t pde, size_t slot, int lvl))
189{
190	scan_l4(pa, fn);
191}
192
193/* -------------------------------------------------------------------------- */
194
195/*
196 * Rule: the number of kernel RWX pages should be zero.
197 */
198static walk_type
199count_krwx(pd_entry_t pde, size_t slot, int lvl)
200{
201	if (lvl == NLEVEL && slot < 256) {
202		return WALK_SKIP;
203	}
204	if (is_flag(pde, PTE_NX) || !is_flag(pde, PTE_W)) {
205		return WALK_SKIP;
206	}
207	if (lvl != 1 && !is_flag(pde, PTE_PS)) {
208		return WALK_NEXT;
209	}
210
211	if (lvl == 4) {
212		tester_ctx.results.n_rwx += (NBPD_L4 / PAGE_SIZE);
213	} else if (lvl == 3) {
214		tester_ctx.results.n_rwx += (NBPD_L3 / PAGE_SIZE);
215	} else if (lvl == 2) {
216		tester_ctx.results.n_rwx += (NBPD_L2 / PAGE_SIZE);
217	} else if (lvl == 1) {
218		tester_ctx.results.n_rwx += (NBPD_L1 / PAGE_SIZE);
219	}
220
221	return WALK_NEXT;
222}
223
224/*
225 * Rule: the number of kernel SHSTK pages should be zero.
226 */
227static walk_type
228count_kshstk(pd_entry_t pde, size_t slot, int lvl)
229{
230	if (lvl == NLEVEL && slot < 256) {
231		return WALK_SKIP;
232	}
233
234	if (is_flag(pde, PTE_PS) || lvl == 1) {
235		if (!is_flag(pde, PTE_W) && is_flag(pde, PTE_D)) {
236			if (lvl == 4) {
237				tester_ctx.results.n_shstk += (NBPD_L4 / PAGE_SIZE);
238			} else if (lvl == 3) {
239				tester_ctx.results.n_shstk += (NBPD_L3 / PAGE_SIZE);
240			} else if (lvl == 2) {
241				tester_ctx.results.n_shstk += (NBPD_L2 / PAGE_SIZE);
242			} else if (lvl == 1) {
243				tester_ctx.results.n_shstk += (NBPD_L1 / PAGE_SIZE);
244			}
245		}
246		return WALK_SKIP;
247	}
248
249	if (!is_flag(pde, PTE_W)) {
250		return WALK_SKIP;
251	}
252
253	return WALK_NEXT;
254}
255
256/*
257 * Rule: the lower half of the kernel map must be zero.
258 */
259static walk_type
260check_kernel_map(pd_entry_t pde, size_t slot, int lvl)
261{
262	if (lvl != NLEVEL) {
263		return WALK_STOP;
264	}
265	if (slot >= 256) {
266		return WALK_SKIP;
267	}
268	if (pde != 0) {
269		tester_ctx.results.kernel_map_with_low_ptes |= true;
270	}
271	return WALK_SKIP;
272}
273
274/*
275 * Rule: the PTE space must not have user permissions.
276 */
277static walk_type
278check_pte_space(pd_entry_t pde, size_t slot, int lvl)
279{
280	if (lvl != NLEVEL) {
281		return WALK_STOP;
282	}
283	if (slot != PDIR_SLOT_PTE) {
284		return WALK_SKIP;
285	}
286	if (is_flag(pde, PTE_U)) {
287		tester_ctx.results.pte_is_user_accessible |= true;
288	}
289	return WALK_SKIP;
290}
291
292/*
293 * Rule: each page in the lower half must have user permissions.
294 */
295static walk_type
296check_user_space(pd_entry_t pde, size_t slot, int lvl)
297{
298	if (lvl == NLEVEL && slot >= 256) {
299		return WALK_SKIP;
300	}
301	if (!is_flag(pde, PTE_U)) {
302		tester_ctx.results.n_user_space_is_kernel += 1;
303		return WALK_SKIP;
304	}
305	return WALK_NEXT;
306}
307
308/*
309 * Rule: each page in the higher half must have kernel permissions.
310 */
311static walk_type
312check_kernel_space(pd_entry_t pde, size_t slot, int lvl)
313{
314	if (lvl == NLEVEL && slot < 256) {
315		return WALK_SKIP;
316	}
317	if (lvl == NLEVEL && slot == PDIR_SLOT_PTE) {
318		return WALK_SKIP;
319	}
320	if (is_flag(pde, PTE_U)) {
321		tester_ctx.results.n_kernel_space_is_user += 1;
322		return WALK_SKIP;
323	}
324	return WALK_NEXT;
325}
326
327/*
328 * Rule: the SVS map is allowed to use the G bit only on the PCPU area.
329 */
330static walk_type
331check_svs_g_bit(pd_entry_t pde, size_t slot, int lvl)
332{
333	if (lvl == NLEVEL && slot == PDIR_SLOT_PCPU) {
334		return WALK_SKIP;
335	}
336	if (is_flag(pde, PTE_G)) {
337		tester_ctx.results.n_svs_g_bit_set += 1;
338		return WALK_SKIP;
339	}
340	return WALK_NEXT;
341}
342
343/* -------------------------------------------------------------------------- */
344
345static void
346scan_svs(void)
347{
348	extern bool svs_enabled;
349	paddr_t pa0;
350
351	if (!svs_enabled) {
352		tester_ctx.results.n_svs_g_bit_set = -1;
353		return;
354	}
355
356	kpreempt_disable();
357	pa0 = curcpu()->ci_svs_updirpa;
358	scan_tree(pa0, &check_user_space);
359	scan_tree(pa0, &check_kernel_space);
360	scan_tree(pa0, &check_svs_g_bit);
361	kpreempt_enable();
362}
363
364static void
365scan_proc(struct proc *p)
366{
367	struct pmap *pmap = p->p_vmspace->vm_map.pmap;
368	paddr_t pa0;
369
370	mutex_enter(&pmap->pm_lock);
371
372	kpreempt_disable();
373	pa0 = (paddr_t)pmap->pm_pdirpa[0];
374	scan_tree(pa0, &check_user_space);
375	scan_tree(pa0, &check_kernel_space);
376	scan_tree(pa0, &check_pte_space);
377	kpreempt_enable();
378
379	mutex_exit(&pmap->pm_lock);
380}
381
382static void
383x86_pte_run_scans(void)
384{
385	struct pmap *kpm = pmap_kernel();
386	paddr_t pa0;
387
388	memset(&tester_ctx.results, 0, sizeof(tester_ctx.results));
389
390	/* Scan the current user process. */
391	scan_proc(curproc);
392
393	/* Scan the SVS mapping. */
394	scan_svs();
395
396	/* Scan the kernel map. */
397	pa0 = (paddr_t)kpm->pm_pdirpa[0];
398	scan_tree(pa0, &count_krwx);
399	scan_tree(pa0, &count_kshstk);
400	scan_tree(pa0, &check_kernel_map);
401}
402
403static void
404x86_pte_levels_init(void)
405{
406	size_t i;
407	for (i = 0; i < NLEVEL; i++) {
408		tester_ctx.levels[i] = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
409		    UVM_KMF_VAONLY);
410	}
411}
412
413static void
414x86_pte_levels_destroy(void)
415{
416	size_t i;
417	for (i = 0; i < NLEVEL; i++) {
418		uvm_km_free(kernel_map, tester_ctx.levels[i], PAGE_SIZE,
419		    UVM_KMF_VAONLY);
420	}
421}
422
423/* -------------------------------------------------------------------------- */
424
425static int
426x86_pte_sysctl_run(SYSCTLFN_ARGS)
427{
428	if (oldlenp == NULL)
429		return EINVAL;
430
431	x86_pte_run_scans();
432
433	if (oldp == NULL) {
434		*oldlenp = sizeof(tester_ctx.results);
435		return 0;
436	}
437
438	if (*oldlenp < sizeof(tester_ctx.results))
439		return ENOMEM;
440
441	return copyout(&tester_ctx.results, oldp, sizeof(tester_ctx.results));
442}
443
444static int
445x86_pte_sysctl_init(void)
446{
447	struct sysctllog **log = &tester_ctx.ctx_sysctllog;
448	const struct sysctlnode *rnode, *cnode;
449	int error;
450
451	error = sysctl_createv(log, 0, NULL, &rnode, CTLFLAG_PERMANENT,
452	    CTLTYPE_NODE, "x86_pte_test",
453	    SYSCTL_DESCR("x86_pte testing interface"),
454	    NULL, 0, NULL, 0, CTL_KERN, CTL_CREATE, CTL_EOL);
455	if (error)
456		goto out;
457
458	error = sysctl_createv(log, 0, &rnode, &cnode, CTLFLAG_PERMANENT,
459	    CTLTYPE_STRUCT, "test",
460	    SYSCTL_DESCR("execute a x86_pte test"),
461	    x86_pte_sysctl_run, 0, NULL, 0, CTL_CREATE, CTL_EOL);
462
463out:
464 	if (error)
465		sysctl_teardown(log);
466	return error;
467}
468
469static void
470x86_pte_sysctl_destroy(void)
471{
472	sysctl_teardown(&tester_ctx.ctx_sysctllog);
473}
474
475/* -------------------------------------------------------------------------- */
476
477MODULE(MODULE_CLASS_MISC, x86_pte_tester, NULL);
478
479static int
480x86_pte_tester_modcmd(modcmd_t cmd, void *arg __unused)
481{
482	int error = 0;
483
484	switch (cmd) {
485	case MODULE_CMD_INIT:
486		x86_pte_levels_init();
487		error = x86_pte_sysctl_init();
488		break;
489	case MODULE_CMD_FINI:
490		x86_pte_sysctl_destroy();
491		x86_pte_levels_destroy();
492		break;
493	default:
494		error = ENOTTY;
495		break;
496	}
497
498	return error;
499}
500