1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD$");
31
32#include <sys/param.h>
33#include <sys/kernel.h>
34#include <sys/types.h>
35#include <sys/systm.h>
36#include <sys/smp.h>
37#include <sys/sysctl.h>
38
39#include <vm/vm.h>
40#include <vm/pmap.h>
41#include <vm/vm_extern.h>
42
43#include <machine/vmm.h>
44
45#include "vmx_cpufunc.h"
46#include "ept.h"
47
48#define	EPT_SUPPORTS_EXEC_ONLY(cap)	((cap) & (1UL << 0))
49#define	EPT_PWL4(cap)			((cap) & (1UL << 6))
50#define	EPT_MEMORY_TYPE_WB(cap)		((cap) & (1UL << 14))
51#define	EPT_PDE_SUPERPAGE(cap)		((cap) & (1UL << 16))	/* 2MB pages */
52#define	EPT_PDPTE_SUPERPAGE(cap)	((cap) & (1UL << 17))	/* 1GB pages */
53#define	INVEPT_SUPPORTED(cap)		((cap) & (1UL << 20))
54#define	AD_BITS_SUPPORTED(cap)		((cap) & (1UL << 21))
55#define	INVVPID_SUPPORTED(cap)		((cap) & (1UL << 32))
56
57#define	INVVPID_ALL_TYPES_MASK		0xF0000000000UL
58#define	INVVPID_ALL_TYPES_SUPPORTED(cap)	\
59	(((cap) & INVVPID_ALL_TYPES_MASK) == INVVPID_ALL_TYPES_MASK)
60
61#define	INVEPT_ALL_TYPES_MASK		0x6000000UL
62#define	INVEPT_ALL_TYPES_SUPPORTED(cap)		\
63	(((cap) & INVEPT_ALL_TYPES_MASK) == INVEPT_ALL_TYPES_MASK)
64
65#define	EPT_PWLEVELS		4		/* page walk levels */
66#define	EPT_ENABLE_AD_BITS	(1 << 6)
67
68SYSCTL_DECL(_hw_vmm);
69SYSCTL_NODE(_hw_vmm, OID_AUTO, ept, CTLFLAG_RW, NULL, NULL);
70
71static int ept_enable_ad_bits;
72
73static int ept_pmap_flags;
74SYSCTL_INT(_hw_vmm_ept, OID_AUTO, pmap_flags, CTLFLAG_RD,
75    &ept_pmap_flags, 0, NULL);
76
77int
78ept_init(int ipinum)
79{
80	int use_hw_ad_bits, use_superpages, use_exec_only;
81	uint64_t cap;
82
83	cap = rdmsr(MSR_VMX_EPT_VPID_CAP);
84
85	/*
86	 * Verify that:
87	 * - page walk length is 4 steps
88	 * - extended page tables can be laid out in write-back memory
89	 * - invvpid instruction with all possible types is supported
90	 * - invept instruction with all possible types is supported
91	 */
92	if (!EPT_PWL4(cap) ||
93	    !EPT_MEMORY_TYPE_WB(cap) ||
94	    !INVVPID_SUPPORTED(cap) ||
95	    !INVVPID_ALL_TYPES_SUPPORTED(cap) ||
96	    !INVEPT_SUPPORTED(cap) ||
97	    !INVEPT_ALL_TYPES_SUPPORTED(cap))
98		return (EINVAL);
99
100	ept_pmap_flags = ipinum & PMAP_NESTED_IPIMASK;
101
102	use_superpages = 1;
103	TUNABLE_INT_FETCH("hw.vmm.ept.use_superpages", &use_superpages);
104	if (use_superpages && EPT_PDE_SUPERPAGE(cap))
105		ept_pmap_flags |= PMAP_PDE_SUPERPAGE;	/* 2MB superpage */
106
107	use_hw_ad_bits = 1;
108	TUNABLE_INT_FETCH("hw.vmm.ept.use_hw_ad_bits", &use_hw_ad_bits);
109	if (use_hw_ad_bits && AD_BITS_SUPPORTED(cap))
110		ept_enable_ad_bits = 1;
111	else
112		ept_pmap_flags |= PMAP_EMULATE_AD_BITS;
113
114	use_exec_only = 1;
115	TUNABLE_INT_FETCH("hw.vmm.ept.use_exec_only", &use_exec_only);
116	if (use_exec_only && EPT_SUPPORTS_EXEC_ONLY(cap))
117		ept_pmap_flags |= PMAP_SUPPORTS_EXEC_ONLY;
118
119	return (0);
120}
121
122#if 0
123static void
124ept_dump(uint64_t *ptp, int nlevels)
125{
126	int i, t, tabs;
127	uint64_t *ptpnext, ptpval;
128
129	if (--nlevels < 0)
130		return;
131
132	tabs = 3 - nlevels;
133	for (t = 0; t < tabs; t++)
134		printf("\t");
135	printf("PTP = %p\n", ptp);
136
137	for (i = 0; i < 512; i++) {
138		ptpval = ptp[i];
139
140		if (ptpval == 0)
141			continue;
142
143		for (t = 0; t < tabs; t++)
144			printf("\t");
145		printf("%3d 0x%016lx\n", i, ptpval);
146
147		if (nlevels != 0 && (ptpval & EPT_PG_SUPERPAGE) == 0) {
148			ptpnext = (uint64_t *)
149				  PHYS_TO_DMAP(ptpval & EPT_ADDR_MASK);
150			ept_dump(ptpnext, nlevels);
151		}
152	}
153}
154#endif
155
156static void
157invept_single_context(void *arg)
158{
159	struct invept_desc desc = *(struct invept_desc *)arg;
160
161	invept(INVEPT_TYPE_SINGLE_CONTEXT, desc);
162}
163
164void
165ept_invalidate_mappings(u_long eptp)
166{
167	struct invept_desc invept_desc = { 0 };
168
169	invept_desc.eptp = eptp;
170
171	smp_rendezvous(NULL, invept_single_context, NULL, &invept_desc);
172}
173
174static int
175ept_pinit(pmap_t pmap)
176{
177
178	return (pmap_pinit_type(pmap, PT_EPT, ept_pmap_flags));
179}
180
181struct vmspace *
182ept_vmspace_alloc(vm_offset_t min, vm_offset_t max)
183{
184
185	return (vmspace_alloc(min, max, ept_pinit));
186}
187
188void
189ept_vmspace_free(struct vmspace *vmspace)
190{
191
192	vmspace_free(vmspace);
193}
194
195uint64_t
196eptp(uint64_t pml4)
197{
198	uint64_t eptp_val;
199
200	eptp_val = pml4 | (EPT_PWLEVELS - 1) << 3 | PAT_WRITE_BACK;
201	if (ept_enable_ad_bits)
202		eptp_val |= EPT_ENABLE_AD_BITS;
203
204	return (eptp_val);
205}
206