1/*-
2 * Copyright (c) 2017 Ruslan Bukin <br@bsdpad.com>
3 * All rights reserved.
4 *
5 * This software was developed by BAE Systems, the University of Cambridge
6 * Computer Laboratory, and Memorial University under DARPA/AFRL contract
7 * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
8 * (TC) research program.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32/*
33 * Design overview.
34 *
35 * The driver provides character device for mmap(2) and ioctl(2) system calls
36 * allowing user to manage isolated compartments ("enclaves") in user VA space.
37 *
38 * The driver duties is EPC pages management, enclave management, user data
39 * validation.
40 *
41 * This driver requires Intel SGX support from hardware.
42 *
43 * /dev/sgx:
44 *    .mmap:
45 *        sgx_mmap_single() allocates VM object with following pager
46 *        operations:
47 *              a) sgx_pg_ctor():
48 *                  VM object constructor does nothing
49 *              b) sgx_pg_dtor():
50 *                  VM object destructor destroys the SGX enclave associated
51 *                  with the object: it frees all the EPC pages allocated for
52 *                  enclave and removes the enclave.
53 *              c) sgx_pg_fault():
54 *                  VM object fault handler does nothing
55 *
56 *    .ioctl:
57 *        sgx_ioctl():
58 *               a) SGX_IOC_ENCLAVE_CREATE
59 *                   Adds Enclave SECS page: initial step of enclave creation.
60 *               b) SGX_IOC_ENCLAVE_ADD_PAGE
61 *                   Adds TCS, REG pages to the enclave.
62 *               c) SGX_IOC_ENCLAVE_INIT
63 *                   Finalizes enclave creation.
64 *
65 * Enclave lifecycle:
66 *          .-- ECREATE  -- Add SECS page
67 *   Kernel |   EADD     -- Add TCS, REG pages
68 *    space |   EEXTEND  -- Measure the page (take unique hash)
69 *    ENCLS |   EPA      -- Allocate version array page
70 *          '-- EINIT    -- Finalize enclave creation
71 *   User   .-- EENTER   -- Go to entry point of enclave
72 *    space |   EEXIT    -- Exit back to main application
73 *    ENCLU '-- ERESUME  -- Resume enclave execution (e.g. after exception)
74 *
75 * Enclave lifecycle from driver point of view:
76 *  1) User calls mmap() on /dev/sgx: we allocate a VM object
77 *  2) User calls ioctl SGX_IOC_ENCLAVE_CREATE: we look for the VM object
78 *     associated with user process created on step 1, create SECS physical
79 *     page and store it in enclave's VM object queue by special index
80 *     SGX_SECS_VM_OBJECT_INDEX.
81 *  3) User calls ioctl SGX_IOC_ENCLAVE_ADD_PAGE: we look for enclave created
82 *     on step 2, create TCS or REG physical page and map it to specified by
83 *     user address of enclave VM object.
84 *  4) User finalizes enclave creation with ioctl SGX_IOC_ENCLAVE_INIT call.
85 *  5) User can freely enter to and exit from enclave using ENCLU instructions
86 *     from userspace: the driver does nothing here.
87 *  6) User proceed munmap(2) system call (or the process with enclave dies):
88 *     we destroy the enclave associated with the object.
89 *
90 * EPC page types and their indexes in VM object queue:
91 *   - PT_SECS index is special and equals SGX_SECS_VM_OBJECT_INDEX (-1);
92 *   - PT_TCS and PT_REG indexes are specified by user in addr field of ioctl
93 *     request data and determined as follows:
94 *       pidx = OFF_TO_IDX(addp->addr - vmh->base);
95 *   - PT_VA index is special, created for PT_REG, PT_TCS and PT_SECS pages
96 *     and determined by formula:
97 *       va_page_idx = - SGX_VA_PAGES_OFFS - (page_idx / SGX_VA_PAGE_SLOTS);
98 *     PT_VA page can hold versions of up to 512 pages, and slot for each
99 *     page in PT_VA page is determined as follows:
100 *       va_slot_idx = page_idx % SGX_VA_PAGE_SLOTS;
101 *   - PT_TRIM is unused.
102 *
103 * Locking:
104 *    SGX ENCLS set of instructions have limitations on concurrency:
105 *    some instructions can't be executed same time on different CPUs.
106 *    We use sc->mtx_encls lock around them to prevent concurrent execution.
107 *    sc->mtx lock is used to manage list of created enclaves and the state of
108 *    SGX driver.
109 *
110 * Eviction of EPC pages:
111 *    Eviction support is not implemented in this driver, however the driver
112 *    manages VA (version array) pages: it allocates a VA slot for each EPC
113 *    page. This will be required for eviction support in future.
114 *    VA pages and slots are currently unused.
115 *
116 * Intel�� 64 and IA-32 Architectures Software Developer's Manual
117 * https://software.intel.com/en-us/articles/intel-sdm
118 */
119
120#include <sys/param.h>
121#include <sys/systm.h>
122#include <sys/ioccom.h>
123#include <sys/malloc.h>
124#include <sys/kernel.h>
125#include <sys/lock.h>
126#include <sys/mutex.h>
127#include <sys/rwlock.h>
128#include <sys/conf.h>
129#include <sys/module.h>
130#include <sys/proc.h>
131#include <sys/vmem.h>
132#include <sys/vmmeter.h>
133
134#include <vm/vm.h>
135#include <vm/vm_param.h>
136#include <vm/vm_extern.h>
137#include <vm/vm_kern.h>
138#include <vm/vm_page.h>
139#include <vm/vm_map.h>
140#include <vm/vm_object.h>
141#include <vm/vm_pager.h>
142#include <vm/vm_phys.h>
143#include <vm/vm_radix.h>
144#include <vm/pmap.h>
145
146#include <machine/md_var.h>
147#include <machine/specialreg.h>
148#include <machine/cpufunc.h>
149#include <machine/sgx.h>
150#include <machine/sgxreg.h>
151
152#include <amd64/sgx/sgxvar.h>
153
154#define	SGX_DEBUG
155#undef	SGX_DEBUG
156
157#ifdef	SGX_DEBUG
158#define	dprintf(fmt, ...)	printf(fmt, ##__VA_ARGS__)
159#else
160#define	dprintf(fmt, ...)
161#endif
162
163static struct cdev_pager_ops sgx_pg_ops;
164struct sgx_softc sgx_sc;
165
166static int
167sgx_get_epc_page(struct sgx_softc *sc, struct epc_page **epc)
168{
169	vmem_addr_t addr;
170	int i;
171
172	if (vmem_alloc(sc->vmem_epc, PAGE_SIZE, M_FIRSTFIT | M_NOWAIT,
173	    &addr) == 0) {
174		i = (addr - sc->epc_base) / PAGE_SIZE;
175		*epc = &sc->epc_pages[i];
176		return (0);
177	}
178
179	return (ENOMEM);
180}
181
182static void
183sgx_put_epc_page(struct sgx_softc *sc, struct epc_page *epc)
184{
185	vmem_addr_t addr;
186
187	if (epc == NULL)
188		return;
189
190	addr = (epc->index * PAGE_SIZE) + sc->epc_base;
191	vmem_free(sc->vmem_epc, addr, PAGE_SIZE);
192}
193
194static int
195sgx_va_slot_init_by_index(struct sgx_softc *sc, vm_object_t object,
196    uint64_t idx)
197{
198	struct epc_page *epc;
199	vm_page_t page;
200	vm_page_t p;
201	int ret;
202
203	VM_OBJECT_ASSERT_WLOCKED(object);
204
205	p = vm_page_lookup(object, idx);
206	if (p == NULL) {
207		ret = sgx_get_epc_page(sc, &epc);
208		if (ret) {
209			dprintf("%s: No free EPC pages available.\n",
210			    __func__);
211			return (ret);
212		}
213
214		mtx_lock(&sc->mtx_encls);
215		sgx_epa((void *)epc->base);
216		mtx_unlock(&sc->mtx_encls);
217
218		page = PHYS_TO_VM_PAGE(epc->phys);
219
220		page->valid = VM_PAGE_BITS_ALL;
221		vm_page_insert(page, object, idx);
222	}
223
224	return (0);
225}
226
227static int
228sgx_va_slot_init(struct sgx_softc *sc,
229    struct sgx_enclave *enclave,
230    uint64_t addr)
231{
232	vm_pindex_t pidx;
233	uint64_t va_page_idx;
234	uint64_t idx;
235	vm_object_t object;
236	int ret;
237
238	object = enclave->object;
239
240	VM_OBJECT_ASSERT_WLOCKED(object);
241
242	pidx = OFF_TO_IDX(addr);
243
244	va_page_idx = pidx / SGX_VA_PAGE_SLOTS;
245	idx = - SGX_VA_PAGES_OFFS - va_page_idx;
246
247	ret = sgx_va_slot_init_by_index(sc, object, idx);
248
249	return (ret);
250}
251
252static int
253sgx_mem_find(struct sgx_softc *sc, uint64_t addr,
254    vm_map_entry_t *entry0, vm_object_t *object0)
255{
256	vm_map_t map;
257	vm_map_entry_t entry;
258	vm_object_t object;
259
260	map = &curproc->p_vmspace->vm_map;
261
262	vm_map_lock_read(map);
263	if (!vm_map_lookup_entry(map, addr, &entry)) {
264		vm_map_unlock_read(map);
265		dprintf("%s: Can't find enclave.\n", __func__);
266		return (EINVAL);
267	}
268
269	object = entry->object.vm_object;
270	if (object == NULL || object->handle == NULL) {
271		vm_map_unlock_read(map);
272		return (EINVAL);
273	}
274
275	if (object->type != OBJT_MGTDEVICE ||
276	    object->un_pager.devp.ops != &sgx_pg_ops) {
277		vm_map_unlock_read(map);
278		return (EINVAL);
279	}
280
281	vm_object_reference(object);
282
283	*object0 = object;
284	*entry0 = entry;
285	vm_map_unlock_read(map);
286
287	return (0);
288}
289
290static int
291sgx_enclave_find(struct sgx_softc *sc, uint64_t addr,
292    struct sgx_enclave **encl)
293{
294	struct sgx_vm_handle *vmh;
295	struct sgx_enclave *enclave;
296	vm_map_entry_t entry;
297	vm_object_t object;
298	int ret;
299
300	ret = sgx_mem_find(sc, addr, &entry, &object);
301	if (ret)
302		return (ret);
303
304	vmh = object->handle;
305	if (vmh == NULL) {
306		vm_object_deallocate(object);
307		return (EINVAL);
308	}
309
310	enclave = vmh->enclave;
311	if (enclave == NULL || enclave->object == NULL) {
312		vm_object_deallocate(object);
313		return (EINVAL);
314	}
315
316	*encl = enclave;
317
318	return (0);
319}
320
321static int
322sgx_enclave_alloc(struct sgx_softc *sc, struct secs *secs,
323    struct sgx_enclave **enclave0)
324{
325	struct sgx_enclave *enclave;
326
327	enclave = malloc(sizeof(struct sgx_enclave),
328	    M_SGX, M_WAITOK | M_ZERO);
329
330	enclave->base = secs->base;
331	enclave->size = secs->size;
332
333	*enclave0 = enclave;
334
335	return (0);
336}
337
338static void
339sgx_epc_page_remove(struct sgx_softc *sc,
340    struct epc_page *epc)
341{
342
343	mtx_lock(&sc->mtx_encls);
344	sgx_eremove((void *)epc->base);
345	mtx_unlock(&sc->mtx_encls);
346}
347
348static void
349sgx_page_remove(struct sgx_softc *sc, vm_page_t p)
350{
351	struct epc_page *epc;
352	vm_paddr_t pa;
353	uint64_t offs;
354
355	(void)vm_page_remove(p);
356
357	dprintf("%s: p->pidx %ld\n", __func__, p->pindex);
358
359	pa = VM_PAGE_TO_PHYS(p);
360	epc = &sc->epc_pages[0];
361	offs = (pa - epc->phys) / PAGE_SIZE;
362	epc = &sc->epc_pages[offs];
363
364	sgx_epc_page_remove(sc, epc);
365	sgx_put_epc_page(sc, epc);
366}
367
368static void
369sgx_enclave_remove(struct sgx_softc *sc,
370    struct sgx_enclave *enclave)
371{
372	vm_object_t object;
373	vm_page_t p, p_secs, p_next;
374
375	mtx_lock(&sc->mtx);
376	TAILQ_REMOVE(&sc->enclaves, enclave, next);
377	mtx_unlock(&sc->mtx);
378
379	object = enclave->object;
380
381	VM_OBJECT_WLOCK(object);
382
383	/*
384	 * First remove all the pages except SECS,
385	 * then remove SECS page.
386	 */
387restart:
388	TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) {
389		if (p->pindex == SGX_SECS_VM_OBJECT_INDEX)
390			continue;
391		if (vm_page_busy_acquire(p, VM_ALLOC_WAITFAIL) == 0)
392			goto restart;
393		sgx_page_remove(sc, p);
394	}
395	p_secs = vm_page_grab(object, SGX_SECS_VM_OBJECT_INDEX,
396	    VM_ALLOC_NOCREAT);
397	/* Now remove SECS page */
398	if (p_secs != NULL)
399		sgx_page_remove(sc, p_secs);
400
401	KASSERT(TAILQ_EMPTY(&object->memq) == 1, ("not empty"));
402	KASSERT(object->resident_page_count == 0, ("count"));
403
404	VM_OBJECT_WUNLOCK(object);
405}
406
407static int
408sgx_measure_page(struct sgx_softc *sc, struct epc_page *secs,
409    struct epc_page *epc, uint16_t mrmask)
410{
411	int i, j;
412	int ret;
413
414	mtx_lock(&sc->mtx_encls);
415
416	for (i = 0, j = 1; i < PAGE_SIZE; i += 0x100, j <<= 1) {
417		if (!(j & mrmask))
418			continue;
419
420		ret = sgx_eextend((void *)secs->base,
421		    (void *)(epc->base + i));
422		if (ret == SGX_EFAULT) {
423			mtx_unlock(&sc->mtx_encls);
424			return (ret);
425		}
426	}
427
428	mtx_unlock(&sc->mtx_encls);
429
430	return (0);
431}
432
433static int
434sgx_secs_validate(struct sgx_softc *sc, struct secs *secs)
435{
436	struct secs_attr *attr;
437	int i;
438
439	if (secs->size == 0)
440		return (EINVAL);
441
442	/* BASEADDR must be naturally aligned on an SECS.SIZE boundary. */
443	if (secs->base & (secs->size - 1))
444		return (EINVAL);
445
446	/* SECS.SIZE must be at least 2 pages. */
447	if (secs->size < 2 * PAGE_SIZE)
448		return (EINVAL);
449
450	if ((secs->size & (secs->size - 1)) != 0)
451		return (EINVAL);
452
453	attr = &secs->attributes;
454
455	if (attr->reserved1 != 0 ||
456	    attr->reserved2 != 0 ||
457	    attr->reserved3 != 0)
458		return (EINVAL);
459
460	for (i = 0; i < SECS_ATTR_RSV4_SIZE; i++)
461		if (attr->reserved4[i])
462			return (EINVAL);
463
464	/*
465	 * Intel�� Software Guard Extensions Programming Reference
466	 * 6.7.2 Relevant Fields in Various Data Structures
467	 * 6.7.2.1 SECS.ATTRIBUTES.XFRM
468	 * XFRM[1:0] must be set to 0x3.
469	 */
470	if ((attr->xfrm & 0x3) != 0x3)
471		return (EINVAL);
472
473	if (!attr->mode64bit)
474		return (EINVAL);
475
476	if (secs->size > sc->enclave_size_max)
477		return (EINVAL);
478
479	for (i = 0; i < SECS_RSV1_SIZE; i++)
480		if (secs->reserved1[i])
481			return (EINVAL);
482
483	for (i = 0; i < SECS_RSV2_SIZE; i++)
484		if (secs->reserved2[i])
485			return (EINVAL);
486
487	for (i = 0; i < SECS_RSV3_SIZE; i++)
488		if (secs->reserved3[i])
489			return (EINVAL);
490
491	for (i = 0; i < SECS_RSV4_SIZE; i++)
492		if (secs->reserved4[i])
493			return (EINVAL);
494
495	return (0);
496}
497
498static int
499sgx_tcs_validate(struct tcs *tcs)
500{
501	int i;
502
503	if ((tcs->flags) ||
504	    (tcs->ossa & (PAGE_SIZE - 1)) ||
505	    (tcs->ofsbasgx & (PAGE_SIZE - 1)) ||
506	    (tcs->ogsbasgx & (PAGE_SIZE - 1)) ||
507	    ((tcs->fslimit & 0xfff) != 0xfff) ||
508	    ((tcs->gslimit & 0xfff) != 0xfff))
509		return (EINVAL);
510
511	for (i = 0; i < nitems(tcs->reserved3); i++)
512		if (tcs->reserved3[i])
513			return (EINVAL);
514
515	return (0);
516}
517
518static void
519sgx_tcs_dump(struct sgx_softc *sc, struct tcs *t)
520{
521
522	dprintf("t->flags %lx\n", t->flags);
523	dprintf("t->ossa %lx\n", t->ossa);
524	dprintf("t->cssa %x\n", t->cssa);
525	dprintf("t->nssa %x\n", t->nssa);
526	dprintf("t->oentry %lx\n", t->oentry);
527	dprintf("t->ofsbasgx %lx\n", t->ofsbasgx);
528	dprintf("t->ogsbasgx %lx\n", t->ogsbasgx);
529	dprintf("t->fslimit %x\n", t->fslimit);
530	dprintf("t->gslimit %x\n", t->gslimit);
531}
532
533static int
534sgx_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
535    vm_ooffset_t foff, struct ucred *cred, u_short *color)
536{
537	struct sgx_vm_handle *vmh;
538
539	vmh = handle;
540	if (vmh == NULL) {
541		dprintf("%s: vmh not found.\n", __func__);
542		return (0);
543	}
544
545	dprintf("%s: vmh->base %lx foff 0x%lx size 0x%lx\n",
546	    __func__, vmh->base, foff, size);
547
548	return (0);
549}
550
551static void
552sgx_pg_dtor(void *handle)
553{
554	struct sgx_vm_handle *vmh;
555	struct sgx_softc *sc;
556
557	vmh = handle;
558	if (vmh == NULL) {
559		dprintf("%s: vmh not found.\n", __func__);
560		return;
561	}
562
563	sc = vmh->sc;
564	if (sc == NULL) {
565		dprintf("%s: sc is NULL\n", __func__);
566		return;
567	}
568
569	if (vmh->enclave == NULL) {
570		dprintf("%s: Enclave not found.\n", __func__);
571		return;
572	}
573
574	sgx_enclave_remove(sc, vmh->enclave);
575
576	free(vmh->enclave, M_SGX);
577	free(vmh, M_SGX);
578}
579
580static int
581sgx_pg_fault(vm_object_t object, vm_ooffset_t offset,
582    int prot, vm_page_t *mres)
583{
584
585	/*
586	 * The purpose of this trivial handler is to handle the race
587	 * when user tries to access mmaped region before or during
588	 * enclave creation ioctl calls.
589	 */
590
591	dprintf("%s: offset 0x%lx\n", __func__, offset);
592
593	return (VM_PAGER_FAIL);
594}
595
596static struct cdev_pager_ops sgx_pg_ops = {
597	.cdev_pg_ctor = sgx_pg_ctor,
598	.cdev_pg_dtor = sgx_pg_dtor,
599	.cdev_pg_fault = sgx_pg_fault,
600};
601
602static void
603sgx_insert_epc_page_by_index(vm_page_t page, vm_object_t object,
604    vm_pindex_t pidx)
605{
606
607	VM_OBJECT_ASSERT_WLOCKED(object);
608
609	page->valid = VM_PAGE_BITS_ALL;
610	vm_page_insert(page, object, pidx);
611}
612
613static void
614sgx_insert_epc_page(struct sgx_enclave *enclave,
615    struct epc_page *epc, uint64_t addr)
616{
617	vm_pindex_t pidx;
618	vm_page_t page;
619
620	VM_OBJECT_ASSERT_WLOCKED(enclave->object);
621
622	pidx = OFF_TO_IDX(addr);
623	page = PHYS_TO_VM_PAGE(epc->phys);
624
625	sgx_insert_epc_page_by_index(page, enclave->object, pidx);
626}
627
628static int
629sgx_ioctl_create(struct sgx_softc *sc, struct sgx_enclave_create *param)
630{
631	struct sgx_vm_handle *vmh;
632	vm_map_entry_t entry;
633	vm_page_t p;
634	struct page_info pginfo;
635	struct secinfo secinfo;
636	struct sgx_enclave *enclave;
637	struct epc_page *epc;
638	struct secs *secs;
639	vm_object_t object;
640	vm_page_t page;
641	int ret;
642
643	epc = NULL;
644	secs = NULL;
645	enclave = NULL;
646	object = NULL;
647
648	/* SGX Enclave Control Structure (SECS) */
649	secs = malloc(PAGE_SIZE, M_SGX, M_WAITOK | M_ZERO);
650	ret = copyin((void *)param->src, secs, sizeof(struct secs));
651	if (ret) {
652		dprintf("%s: Can't copy SECS.\n", __func__);
653		goto error;
654	}
655
656	ret = sgx_secs_validate(sc, secs);
657	if (ret) {
658		dprintf("%s: SECS validation failed.\n", __func__);
659		goto error;
660	}
661
662	ret = sgx_mem_find(sc, secs->base, &entry, &object);
663	if (ret) {
664		dprintf("%s: Can't find vm_map.\n", __func__);
665		goto error;
666	}
667
668	vmh = object->handle;
669	if (!vmh) {
670		dprintf("%s: Can't find vmh.\n", __func__);
671		ret = ENXIO;
672		goto error;
673	}
674
675	dprintf("%s: entry start %lx offset %lx\n",
676	    __func__, entry->start, entry->offset);
677	vmh->base = (entry->start - entry->offset);
678
679	ret = sgx_enclave_alloc(sc, secs, &enclave);
680	if (ret) {
681		dprintf("%s: Can't alloc enclave.\n", __func__);
682		goto error;
683	}
684	enclave->object = object;
685	enclave->vmh = vmh;
686
687	memset(&secinfo, 0, sizeof(struct secinfo));
688	memset(&pginfo, 0, sizeof(struct page_info));
689	pginfo.linaddr = 0;
690	pginfo.srcpge = (uint64_t)secs;
691	pginfo.secinfo = &secinfo;
692	pginfo.secs = 0;
693
694	ret = sgx_get_epc_page(sc, &epc);
695	if (ret) {
696		dprintf("%s: Failed to get free epc page.\n", __func__);
697		goto error;
698	}
699	enclave->secs_epc_page = epc;
700
701	VM_OBJECT_WLOCK(object);
702	p = vm_page_lookup(object, SGX_SECS_VM_OBJECT_INDEX);
703	if (p) {
704		VM_OBJECT_WUNLOCK(object);
705		/* SECS page already added. */
706		ret = ENXIO;
707		goto error;
708	}
709
710	ret = sgx_va_slot_init_by_index(sc, object,
711	    - SGX_VA_PAGES_OFFS - SGX_SECS_VM_OBJECT_INDEX);
712	if (ret) {
713		VM_OBJECT_WUNLOCK(object);
714		dprintf("%s: Can't init va slot.\n", __func__);
715		goto error;
716	}
717
718	mtx_lock(&sc->mtx);
719	if ((sc->state & SGX_STATE_RUNNING) == 0) {
720		mtx_unlock(&sc->mtx);
721		/* Remove VA page that was just created for SECS page. */
722		p = vm_page_grab(enclave->object,
723		    - SGX_VA_PAGES_OFFS - SGX_SECS_VM_OBJECT_INDEX,
724		    VM_ALLOC_NOCREAT);
725		sgx_page_remove(sc, p);
726		VM_OBJECT_WUNLOCK(object);
727		goto error;
728	}
729	mtx_lock(&sc->mtx_encls);
730	ret = sgx_ecreate(&pginfo, (void *)epc->base);
731	mtx_unlock(&sc->mtx_encls);
732	if (ret == SGX_EFAULT) {
733		dprintf("%s: gp fault\n", __func__);
734		mtx_unlock(&sc->mtx);
735		/* Remove VA page that was just created for SECS page. */
736		p = vm_page_grab(enclave->object,
737		    - SGX_VA_PAGES_OFFS - SGX_SECS_VM_OBJECT_INDEX,
738		    VM_ALLOC_NOCREAT);
739		sgx_page_remove(sc, p);
740		VM_OBJECT_WUNLOCK(object);
741		goto error;
742	}
743
744	TAILQ_INSERT_TAIL(&sc->enclaves, enclave, next);
745	mtx_unlock(&sc->mtx);
746
747	vmh->enclave = enclave;
748
749	page = PHYS_TO_VM_PAGE(epc->phys);
750	sgx_insert_epc_page_by_index(page, enclave->object,
751	    SGX_SECS_VM_OBJECT_INDEX);
752
753	VM_OBJECT_WUNLOCK(object);
754
755	/* Release the reference. */
756	vm_object_deallocate(object);
757
758	free(secs, M_SGX);
759
760	return (0);
761
762error:
763	free(secs, M_SGX);
764	sgx_put_epc_page(sc, epc);
765	free(enclave, M_SGX);
766	vm_object_deallocate(object);
767
768	return (ret);
769}
770
771static int
772sgx_ioctl_add_page(struct sgx_softc *sc,
773    struct sgx_enclave_add_page *addp)
774{
775	struct epc_page *secs_epc_page;
776	struct sgx_enclave *enclave;
777	struct sgx_vm_handle *vmh;
778	struct epc_page *epc;
779	struct page_info pginfo;
780	struct secinfo secinfo;
781	vm_object_t object;
782	void *tmp_vaddr;
783	uint64_t page_type;
784	struct tcs *t;
785	uint64_t addr;
786	uint64_t pidx;
787	vm_page_t p;
788	int ret;
789
790	tmp_vaddr = NULL;
791	epc = NULL;
792	object = NULL;
793
794	/* Find and get reference to VM object. */
795	ret = sgx_enclave_find(sc, addp->addr, &enclave);
796	if (ret) {
797		dprintf("%s: Failed to find enclave.\n", __func__);
798		goto error;
799	}
800
801	object = enclave->object;
802	KASSERT(object != NULL, ("vm object is NULL\n"));
803	vmh = object->handle;
804
805	ret = sgx_get_epc_page(sc, &epc);
806	if (ret) {
807		dprintf("%s: Failed to get free epc page.\n", __func__);
808		goto error;
809	}
810
811	memset(&secinfo, 0, sizeof(struct secinfo));
812	ret = copyin((void *)addp->secinfo, &secinfo,
813	    sizeof(struct secinfo));
814	if (ret) {
815		dprintf("%s: Failed to copy secinfo.\n", __func__);
816		goto error;
817	}
818
819	tmp_vaddr = malloc(PAGE_SIZE, M_SGX, M_WAITOK | M_ZERO);
820	ret = copyin((void *)addp->src, tmp_vaddr, PAGE_SIZE);
821	if (ret) {
822		dprintf("%s: Failed to copy page.\n", __func__);
823		goto error;
824	}
825
826	page_type = (secinfo.flags & SECINFO_FLAGS_PT_M) >>
827	    SECINFO_FLAGS_PT_S;
828	if (page_type != SGX_PT_TCS && page_type != SGX_PT_REG) {
829		dprintf("%s: page can't be added.\n", __func__);
830		goto error;
831	}
832	if (page_type == SGX_PT_TCS) {
833		t = (struct tcs *)tmp_vaddr;
834		ret = sgx_tcs_validate(t);
835		if (ret) {
836			dprintf("%s: TCS page validation failed.\n",
837			    __func__);
838			goto error;
839		}
840		sgx_tcs_dump(sc, t);
841	}
842
843	addr = (addp->addr - vmh->base);
844	pidx = OFF_TO_IDX(addr);
845
846	VM_OBJECT_WLOCK(object);
847	p = vm_page_lookup(object, pidx);
848	if (p) {
849		VM_OBJECT_WUNLOCK(object);
850		/* Page already added. */
851		ret = ENXIO;
852		goto error;
853	}
854
855	ret = sgx_va_slot_init(sc, enclave, addr);
856	if (ret) {
857		VM_OBJECT_WUNLOCK(object);
858		dprintf("%s: Can't init va slot.\n", __func__);
859		goto error;
860	}
861
862	secs_epc_page = enclave->secs_epc_page;
863	memset(&pginfo, 0, sizeof(struct page_info));
864	pginfo.linaddr = (uint64_t)addp->addr;
865	pginfo.srcpge = (uint64_t)tmp_vaddr;
866	pginfo.secinfo = &secinfo;
867	pginfo.secs = (uint64_t)secs_epc_page->base;
868
869	mtx_lock(&sc->mtx_encls);
870	ret = sgx_eadd(&pginfo, (void *)epc->base);
871	if (ret == SGX_EFAULT) {
872		dprintf("%s: gp fault on eadd\n", __func__);
873		mtx_unlock(&sc->mtx_encls);
874		VM_OBJECT_WUNLOCK(object);
875		goto error;
876	}
877	mtx_unlock(&sc->mtx_encls);
878
879	ret = sgx_measure_page(sc, enclave->secs_epc_page, epc, addp->mrmask);
880	if (ret == SGX_EFAULT) {
881		dprintf("%s: gp fault on eextend\n", __func__);
882		sgx_epc_page_remove(sc, epc);
883		VM_OBJECT_WUNLOCK(object);
884		goto error;
885	}
886
887	sgx_insert_epc_page(enclave, epc, addr);
888
889	VM_OBJECT_WUNLOCK(object);
890
891	/* Release the reference. */
892	vm_object_deallocate(object);
893
894	free(tmp_vaddr, M_SGX);
895
896	return (0);
897
898error:
899	free(tmp_vaddr, M_SGX);
900	sgx_put_epc_page(sc, epc);
901	vm_object_deallocate(object);
902
903	return (ret);
904}
905
906static int
907sgx_ioctl_init(struct sgx_softc *sc, struct sgx_enclave_init *initp)
908{
909	struct epc_page *secs_epc_page;
910	struct sgx_enclave *enclave;
911	struct thread *td;
912	void *tmp_vaddr;
913	void *einittoken;
914	void *sigstruct;
915	vm_object_t object;
916	int retry;
917	int ret;
918
919	td = curthread;
920	tmp_vaddr = NULL;
921	object = NULL;
922
923	dprintf("%s: addr %lx, sigstruct %lx, einittoken %lx\n",
924	    __func__, initp->addr, initp->sigstruct, initp->einittoken);
925
926	/* Find and get reference to VM object. */
927	ret = sgx_enclave_find(sc, initp->addr, &enclave);
928	if (ret) {
929		dprintf("%s: Failed to find enclave.\n", __func__);
930		goto error;
931	}
932
933	object = enclave->object;
934
935	tmp_vaddr = malloc(PAGE_SIZE, M_SGX, M_WAITOK | M_ZERO);
936	sigstruct = tmp_vaddr;
937	einittoken = (void *)((uint64_t)sigstruct + PAGE_SIZE / 2);
938
939	ret = copyin((void *)initp->sigstruct, sigstruct,
940	    SGX_SIGSTRUCT_SIZE);
941	if (ret) {
942		dprintf("%s: Failed to copy SIGSTRUCT page.\n", __func__);
943		goto error;
944	}
945
946	ret = copyin((void *)initp->einittoken, einittoken,
947	    SGX_EINITTOKEN_SIZE);
948	if (ret) {
949		dprintf("%s: Failed to copy EINITTOKEN page.\n", __func__);
950		goto error;
951	}
952
953	secs_epc_page = enclave->secs_epc_page;
954	retry = 16;
955	do {
956		mtx_lock(&sc->mtx_encls);
957		ret = sgx_einit(sigstruct, (void *)secs_epc_page->base,
958		    einittoken);
959		mtx_unlock(&sc->mtx_encls);
960		dprintf("%s: sgx_einit returned %d\n", __func__, ret);
961	} while (ret == SGX_UNMASKED_EVENT && retry--);
962
963	if (ret) {
964		dprintf("%s: Failed init enclave: %d\n", __func__, ret);
965		td->td_retval[0] = ret;
966		ret = 0;
967	}
968
969error:
970	free(tmp_vaddr, M_SGX);
971
972	/* Release the reference. */
973	vm_object_deallocate(object);
974
975	return (ret);
976}
977
978static int
979sgx_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
980    struct thread *td)
981{
982	struct sgx_enclave_add_page *addp;
983	struct sgx_enclave_create *param;
984	struct sgx_enclave_init *initp;
985	struct sgx_softc *sc;
986	int ret;
987	int len;
988
989	sc = &sgx_sc;
990
991	len = IOCPARM_LEN(cmd);
992
993	dprintf("%s: cmd %lx, addr %lx, len %d\n",
994	    __func__, cmd, (uint64_t)addr, len);
995
996	if (len > SGX_IOCTL_MAX_DATA_LEN)
997		return (EINVAL);
998
999	switch (cmd) {
1000	case SGX_IOC_ENCLAVE_CREATE:
1001		param = (struct sgx_enclave_create *)addr;
1002		ret = sgx_ioctl_create(sc, param);
1003		break;
1004	case SGX_IOC_ENCLAVE_ADD_PAGE:
1005		addp = (struct sgx_enclave_add_page *)addr;
1006		ret = sgx_ioctl_add_page(sc, addp);
1007		break;
1008	case SGX_IOC_ENCLAVE_INIT:
1009		initp = (struct sgx_enclave_init *)addr;
1010		ret = sgx_ioctl_init(sc, initp);
1011		break;
1012	default:
1013		return (EINVAL);
1014	}
1015
1016	return (ret);
1017}
1018
1019static int
1020sgx_mmap_single(struct cdev *cdev, vm_ooffset_t *offset,
1021    vm_size_t mapsize, struct vm_object **objp, int nprot)
1022{
1023	struct sgx_vm_handle *vmh;
1024	struct sgx_softc *sc;
1025
1026	sc = &sgx_sc;
1027
1028	dprintf("%s: mapsize 0x%lx, offset %lx\n",
1029	    __func__, mapsize, *offset);
1030
1031	vmh = malloc(sizeof(struct sgx_vm_handle),
1032	    M_SGX, M_WAITOK | M_ZERO);
1033	vmh->sc = sc;
1034	vmh->size = mapsize;
1035	vmh->mem = cdev_pager_allocate(vmh, OBJT_MGTDEVICE, &sgx_pg_ops,
1036	    mapsize, nprot, *offset, NULL);
1037	if (vmh->mem == NULL) {
1038		free(vmh, M_SGX);
1039		return (ENOMEM);
1040	}
1041
1042	VM_OBJECT_WLOCK(vmh->mem);
1043	vm_object_set_flag(vmh->mem, OBJ_PG_DTOR);
1044	VM_OBJECT_WUNLOCK(vmh->mem);
1045
1046	*objp = vmh->mem;
1047
1048	return (0);
1049}
1050
1051static struct cdevsw sgx_cdevsw = {
1052	.d_version =		D_VERSION,
1053	.d_ioctl =		sgx_ioctl,
1054	.d_mmap_single =	sgx_mmap_single,
1055	.d_name =		"Intel SGX",
1056};
1057
1058static int
1059sgx_get_epc_area(struct sgx_softc *sc)
1060{
1061	vm_offset_t epc_base_vaddr;
1062	u_int cp[4];
1063	int error;
1064	int i;
1065
1066	cpuid_count(SGX_CPUID, 0x2, cp);
1067
1068	sc->epc_base = ((uint64_t)(cp[1] & 0xfffff) << 32) +
1069	    (cp[0] & 0xfffff000);
1070	sc->epc_size = ((uint64_t)(cp[3] & 0xfffff) << 32) +
1071	    (cp[2] & 0xfffff000);
1072	sc->npages = sc->epc_size / SGX_PAGE_SIZE;
1073
1074	if (sc->epc_size == 0 || sc->epc_base == 0) {
1075		printf("%s: Incorrect EPC data: EPC base %lx, size %lu\n",
1076		    __func__, sc->epc_base, sc->epc_size);
1077		return (EINVAL);
1078	}
1079
1080	if (cp[3] & 0xffff)
1081		sc->enclave_size_max = (1 << ((cp[3] >> 8) & 0xff));
1082	else
1083		sc->enclave_size_max = SGX_ENCL_SIZE_MAX_DEF;
1084
1085	epc_base_vaddr = (vm_offset_t)pmap_mapdev_attr(sc->epc_base,
1086	    sc->epc_size, VM_MEMATTR_DEFAULT);
1087
1088	sc->epc_pages = malloc(sizeof(struct epc_page) * sc->npages,
1089	    M_DEVBUF, M_WAITOK | M_ZERO);
1090
1091	for (i = 0; i < sc->npages; i++) {
1092		sc->epc_pages[i].base = epc_base_vaddr + SGX_PAGE_SIZE * i;
1093		sc->epc_pages[i].phys = sc->epc_base + SGX_PAGE_SIZE * i;
1094		sc->epc_pages[i].index = i;
1095	}
1096
1097	sc->vmem_epc = vmem_create("SGX EPC", sc->epc_base, sc->epc_size,
1098	    PAGE_SIZE, PAGE_SIZE, M_FIRSTFIT | M_WAITOK);
1099	if (sc->vmem_epc == NULL) {
1100		printf("%s: Can't create vmem arena.\n", __func__);
1101		free(sc->epc_pages, M_SGX);
1102		return (EINVAL);
1103	}
1104
1105	error = vm_phys_fictitious_reg_range(sc->epc_base,
1106	    sc->epc_base + sc->epc_size, VM_MEMATTR_DEFAULT);
1107	if (error) {
1108		printf("%s: Can't register fictitious space.\n", __func__);
1109		free(sc->epc_pages, M_SGX);
1110		return (EINVAL);
1111	}
1112
1113	return (0);
1114}
1115
1116static void
1117sgx_put_epc_area(struct sgx_softc *sc)
1118{
1119
1120	vm_phys_fictitious_unreg_range(sc->epc_base,
1121	    sc->epc_base + sc->epc_size);
1122
1123	free(sc->epc_pages, M_SGX);
1124}
1125
1126static int
1127sgx_load(void)
1128{
1129	struct sgx_softc *sc;
1130	int error;
1131
1132	sc = &sgx_sc;
1133
1134	if ((cpu_stdext_feature & CPUID_STDEXT_SGX) == 0)
1135		return (ENXIO);
1136
1137	error = sgx_get_epc_area(sc);
1138	if (error) {
1139		printf("%s: Failed to get Processor Reserved Memory area.\n",
1140		    __func__);
1141		return (ENXIO);
1142	}
1143
1144	mtx_init(&sc->mtx_encls, "SGX ENCLS", NULL, MTX_DEF);
1145	mtx_init(&sc->mtx, "SGX driver", NULL, MTX_DEF);
1146
1147	TAILQ_INIT(&sc->enclaves);
1148
1149	sc->sgx_cdev = make_dev(&sgx_cdevsw, 0, UID_ROOT, GID_WHEEL,
1150	    0600, "isgx");
1151
1152	sc->state |= SGX_STATE_RUNNING;
1153
1154	printf("SGX initialized: EPC base 0x%lx size %ld (%d pages)\n",
1155	    sc->epc_base, sc->epc_size, sc->npages);
1156
1157	return (0);
1158}
1159
1160static int
1161sgx_unload(void)
1162{
1163	struct sgx_softc *sc;
1164
1165	sc = &sgx_sc;
1166
1167	if ((sc->state & SGX_STATE_RUNNING) == 0)
1168		return (0);
1169
1170	mtx_lock(&sc->mtx);
1171	if (!TAILQ_EMPTY(&sc->enclaves)) {
1172		mtx_unlock(&sc->mtx);
1173		return (EBUSY);
1174	}
1175	sc->state &= ~SGX_STATE_RUNNING;
1176	mtx_unlock(&sc->mtx);
1177
1178	destroy_dev(sc->sgx_cdev);
1179
1180	vmem_destroy(sc->vmem_epc);
1181	sgx_put_epc_area(sc);
1182
1183	mtx_destroy(&sc->mtx_encls);
1184	mtx_destroy(&sc->mtx);
1185
1186	return (0);
1187}
1188
1189static int
1190sgx_handler(module_t mod, int what, void *arg)
1191{
1192	int error;
1193
1194	switch (what) {
1195	case MOD_LOAD:
1196		error = sgx_load();
1197		break;
1198	case MOD_UNLOAD:
1199		error = sgx_unload();
1200		break;
1201	default:
1202		error = 0;
1203		break;
1204	}
1205
1206	return (error);
1207}
1208
1209static moduledata_t sgx_kmod = {
1210	"sgx",
1211	sgx_handler,
1212	NULL
1213};
1214
1215DECLARE_MODULE(sgx, sgx_kmod, SI_SUB_LAST, SI_ORDER_ANY);
1216MODULE_VERSION(sgx, 1);
1217