1// SPDX-License-Identifier: GPL-2.0
2/*  Copyright(c) 2016-20 Intel Corporation. */
3
4#include <asm/mman.h>
5#include <asm/sgx.h>
6#include <linux/mman.h>
7#include <linux/delay.h>
8#include <linux/file.h>
9#include <linux/hashtable.h>
10#include <linux/highmem.h>
11#include <linux/ratelimit.h>
12#include <linux/sched/signal.h>
13#include <linux/shmem_fs.h>
14#include <linux/slab.h>
15#include <linux/suspend.h>
16#include "driver.h"
17#include "encl.h"
18#include "encls.h"
19
20struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl, bool reclaim)
21{
22	struct sgx_va_page *va_page = NULL;
23	void *err;
24
25	BUILD_BUG_ON(SGX_VA_SLOT_COUNT !=
26		(SGX_ENCL_PAGE_VA_OFFSET_MASK >> 3) + 1);
27
28	if (!(encl->page_cnt % SGX_VA_SLOT_COUNT)) {
29		va_page = kzalloc(sizeof(*va_page), GFP_KERNEL);
30		if (!va_page)
31			return ERR_PTR(-ENOMEM);
32
33		va_page->epc_page = sgx_alloc_va_page(reclaim);
34		if (IS_ERR(va_page->epc_page)) {
35			err = ERR_CAST(va_page->epc_page);
36			kfree(va_page);
37			return err;
38		}
39
40		WARN_ON_ONCE(encl->page_cnt % SGX_VA_SLOT_COUNT);
41	}
42	encl->page_cnt++;
43	return va_page;
44}
45
46void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page)
47{
48	encl->page_cnt--;
49
50	if (va_page) {
51		sgx_encl_free_epc_page(va_page->epc_page);
52		list_del(&va_page->list);
53		kfree(va_page);
54	}
55}
56
57static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
58{
59	struct sgx_epc_page *secs_epc;
60	struct sgx_va_page *va_page;
61	struct sgx_pageinfo pginfo;
62	struct sgx_secinfo secinfo;
63	unsigned long encl_size;
64	struct file *backing;
65	long ret;
66
67	va_page = sgx_encl_grow(encl, true);
68	if (IS_ERR(va_page))
69		return PTR_ERR(va_page);
70	else if (va_page)
71		list_add(&va_page->list, &encl->va_pages);
72	/* else the tail page of the VA page list had free slots. */
73
74	/* The extra page goes to SECS. */
75	encl_size = secs->size + PAGE_SIZE;
76
77	backing = shmem_file_setup("SGX backing", encl_size + (encl_size >> 5),
78				   VM_NORESERVE);
79	if (IS_ERR(backing)) {
80		ret = PTR_ERR(backing);
81		goto err_out_shrink;
82	}
83
84	encl->backing = backing;
85
86	secs_epc = sgx_alloc_epc_page(&encl->secs, true);
87	if (IS_ERR(secs_epc)) {
88		ret = PTR_ERR(secs_epc);
89		goto err_out_backing;
90	}
91
92	encl->secs.epc_page = secs_epc;
93
94	pginfo.addr = 0;
95	pginfo.contents = (unsigned long)secs;
96	pginfo.metadata = (unsigned long)&secinfo;
97	pginfo.secs = 0;
98	memset(&secinfo, 0, sizeof(secinfo));
99
100	ret = __ecreate((void *)&pginfo, sgx_get_epc_virt_addr(secs_epc));
101	if (ret) {
102		ret = -EIO;
103		goto err_out;
104	}
105
106	if (secs->attributes & SGX_ATTR_DEBUG)
107		set_bit(SGX_ENCL_DEBUG, &encl->flags);
108
109	encl->secs.encl = encl;
110	encl->secs.type = SGX_PAGE_TYPE_SECS;
111	encl->base = secs->base;
112	encl->size = secs->size;
113	encl->attributes = secs->attributes;
114	encl->attributes_mask = SGX_ATTR_UNPRIV_MASK;
115
116	/* Set only after completion, as encl->lock has not been taken. */
117	set_bit(SGX_ENCL_CREATED, &encl->flags);
118
119	return 0;
120
121err_out:
122	sgx_encl_free_epc_page(encl->secs.epc_page);
123	encl->secs.epc_page = NULL;
124
125err_out_backing:
126	fput(encl->backing);
127	encl->backing = NULL;
128
129err_out_shrink:
130	sgx_encl_shrink(encl, va_page);
131
132	return ret;
133}
134
135/**
136 * sgx_ioc_enclave_create() - handler for %SGX_IOC_ENCLAVE_CREATE
137 * @encl:	An enclave pointer.
138 * @arg:	The ioctl argument.
139 *
140 * Allocate kernel data structures for the enclave and invoke ECREATE.
141 *
142 * Return:
143 * - 0:		Success.
144 * - -EIO:	ECREATE failed.
145 * - -errno:	POSIX error.
146 */
147static long sgx_ioc_enclave_create(struct sgx_encl *encl, void __user *arg)
148{
149	struct sgx_enclave_create create_arg;
150	void *secs;
151	int ret;
152
153	if (test_bit(SGX_ENCL_CREATED, &encl->flags))
154		return -EINVAL;
155
156	if (copy_from_user(&create_arg, arg, sizeof(create_arg)))
157		return -EFAULT;
158
159	secs = kmalloc(PAGE_SIZE, GFP_KERNEL);
160	if (!secs)
161		return -ENOMEM;
162
163	if (copy_from_user(secs, (void __user *)create_arg.src, PAGE_SIZE))
164		ret = -EFAULT;
165	else
166		ret = sgx_encl_create(encl, secs);
167
168	kfree(secs);
169	return ret;
170}
171
172static int sgx_validate_secinfo(struct sgx_secinfo *secinfo)
173{
174	u64 perm = secinfo->flags & SGX_SECINFO_PERMISSION_MASK;
175	u64 pt   = secinfo->flags & SGX_SECINFO_PAGE_TYPE_MASK;
176
177	if (pt != SGX_SECINFO_REG && pt != SGX_SECINFO_TCS)
178		return -EINVAL;
179
180	if ((perm & SGX_SECINFO_W) && !(perm & SGX_SECINFO_R))
181		return -EINVAL;
182
183	/*
184	 * CPU will silently overwrite the permissions as zero, which means
185	 * that we need to validate it ourselves.
186	 */
187	if (pt == SGX_SECINFO_TCS && perm)
188		return -EINVAL;
189
190	if (secinfo->flags & SGX_SECINFO_RESERVED_MASK)
191		return -EINVAL;
192
193	if (memchr_inv(secinfo->reserved, 0, sizeof(secinfo->reserved)))
194		return -EINVAL;
195
196	return 0;
197}
198
199static int __sgx_encl_add_page(struct sgx_encl *encl,
200			       struct sgx_encl_page *encl_page,
201			       struct sgx_epc_page *epc_page,
202			       struct sgx_secinfo *secinfo, unsigned long src)
203{
204	struct sgx_pageinfo pginfo;
205	struct vm_area_struct *vma;
206	struct page *src_page;
207	int ret;
208
209	/* Deny noexec. */
210	vma = find_vma(current->mm, src);
211	if (!vma)
212		return -EFAULT;
213
214	if (!(vma->vm_flags & VM_MAYEXEC))
215		return -EACCES;
216
217	ret = get_user_pages(src, 1, 0, &src_page);
218	if (ret < 1)
219		return -EFAULT;
220
221	pginfo.secs = (unsigned long)sgx_get_epc_virt_addr(encl->secs.epc_page);
222	pginfo.addr = encl_page->desc & PAGE_MASK;
223	pginfo.metadata = (unsigned long)secinfo;
224	pginfo.contents = (unsigned long)kmap_local_page(src_page);
225
226	ret = __eadd(&pginfo, sgx_get_epc_virt_addr(epc_page));
227
228	kunmap_local((void *)pginfo.contents);
229	put_page(src_page);
230
231	return ret ? -EIO : 0;
232}
233
234/*
235 * If the caller requires measurement of the page as a proof for the content,
236 * use EEXTEND to add a measurement for 256 bytes of the page. Repeat this
237 * operation until the entire page is measured."
238 */
239static int __sgx_encl_extend(struct sgx_encl *encl,
240			     struct sgx_epc_page *epc_page)
241{
242	unsigned long offset;
243	int ret;
244
245	for (offset = 0; offset < PAGE_SIZE; offset += SGX_EEXTEND_BLOCK_SIZE) {
246		ret = __eextend(sgx_get_epc_virt_addr(encl->secs.epc_page),
247				sgx_get_epc_virt_addr(epc_page) + offset);
248		if (ret) {
249			if (encls_failed(ret))
250				ENCLS_WARN(ret, "EEXTEND");
251
252			return -EIO;
253		}
254	}
255
256	return 0;
257}
258
259static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
260			     unsigned long offset, struct sgx_secinfo *secinfo,
261			     unsigned long flags)
262{
263	struct sgx_encl_page *encl_page;
264	struct sgx_epc_page *epc_page;
265	struct sgx_va_page *va_page;
266	int ret;
267
268	encl_page = sgx_encl_page_alloc(encl, offset, secinfo->flags);
269	if (IS_ERR(encl_page))
270		return PTR_ERR(encl_page);
271
272	epc_page = sgx_alloc_epc_page(encl_page, true);
273	if (IS_ERR(epc_page)) {
274		kfree(encl_page);
275		return PTR_ERR(epc_page);
276	}
277
278	va_page = sgx_encl_grow(encl, true);
279	if (IS_ERR(va_page)) {
280		ret = PTR_ERR(va_page);
281		goto err_out_free;
282	}
283
284	mmap_read_lock(current->mm);
285	mutex_lock(&encl->lock);
286
287	/*
288	 * Adding to encl->va_pages must be done under encl->lock.  Ditto for
289	 * deleting (via sgx_encl_shrink()) in the error path.
290	 */
291	if (va_page)
292		list_add(&va_page->list, &encl->va_pages);
293
294	/*
295	 * Insert prior to EADD in case of OOM.  EADD modifies MRENCLAVE, i.e.
296	 * can't be gracefully unwound, while failure on EADD/EXTEND is limited
297	 * to userspace errors (or kernel/hardware bugs).
298	 */
299	ret = xa_insert(&encl->page_array, PFN_DOWN(encl_page->desc),
300			encl_page, GFP_KERNEL);
301	if (ret)
302		goto err_out_unlock;
303
304	ret = __sgx_encl_add_page(encl, encl_page, epc_page, secinfo,
305				  src);
306	if (ret)
307		goto err_out;
308
309	/*
310	 * Complete the "add" before doing the "extend" so that the "add"
311	 * isn't in a half-baked state in the extremely unlikely scenario
312	 * the enclave will be destroyed in response to EEXTEND failure.
313	 */
314	encl_page->encl = encl;
315	encl_page->epc_page = epc_page;
316	encl_page->type = (secinfo->flags & SGX_SECINFO_PAGE_TYPE_MASK) >> 8;
317	encl->secs_child_cnt++;
318
319	if (flags & SGX_PAGE_MEASURE) {
320		ret = __sgx_encl_extend(encl, epc_page);
321		if (ret)
322			goto err_out;
323	}
324
325	sgx_mark_page_reclaimable(encl_page->epc_page);
326	mutex_unlock(&encl->lock);
327	mmap_read_unlock(current->mm);
328	return ret;
329
330err_out:
331	xa_erase(&encl->page_array, PFN_DOWN(encl_page->desc));
332
333err_out_unlock:
334	sgx_encl_shrink(encl, va_page);
335	mutex_unlock(&encl->lock);
336	mmap_read_unlock(current->mm);
337
338err_out_free:
339	sgx_encl_free_epc_page(epc_page);
340	kfree(encl_page);
341
342	return ret;
343}
344
345/*
346 * Ensure user provided offset and length values are valid for
347 * an enclave.
348 */
349static int sgx_validate_offset_length(struct sgx_encl *encl,
350				      unsigned long offset,
351				      unsigned long length)
352{
353	if (!IS_ALIGNED(offset, PAGE_SIZE))
354		return -EINVAL;
355
356	if (!length || !IS_ALIGNED(length, PAGE_SIZE))
357		return -EINVAL;
358
359	if (offset + length < offset)
360		return -EINVAL;
361
362	if (offset + length - PAGE_SIZE >= encl->size)
363		return -EINVAL;
364
365	return 0;
366}
367
368/**
369 * sgx_ioc_enclave_add_pages() - The handler for %SGX_IOC_ENCLAVE_ADD_PAGES
370 * @encl:       an enclave pointer
371 * @arg:	a user pointer to a struct sgx_enclave_add_pages instance
372 *
373 * Add one or more pages to an uninitialized enclave, and optionally extend the
374 * measurement with the contents of the page. The SECINFO and measurement mask
375 * are applied to all pages.
376 *
377 * A SECINFO for a TCS is required to always contain zero permissions because
378 * CPU silently zeros them. Allowing anything else would cause a mismatch in
379 * the measurement.
380 *
381 * mmap()'s protection bits are capped by the page permissions. For each page
382 * address, the maximum protection bits are computed with the following
383 * heuristics:
384 *
385 * 1. A regular page: PROT_R, PROT_W and PROT_X match the SECINFO permissions.
386 * 2. A TCS page: PROT_R | PROT_W.
387 *
388 * mmap() is not allowed to surpass the minimum of the maximum protection bits
389 * within the given address range.
390 *
391 * The function deinitializes kernel data structures for enclave and returns
392 * -EIO in any of the following conditions:
393 *
394 * - Enclave Page Cache (EPC), the physical memory holding enclaves, has
395 *   been invalidated. This will cause EADD and EEXTEND to fail.
396 * - If the source address is corrupted somehow when executing EADD.
397 *
398 * Return:
399 * - 0:		Success.
400 * - -EACCES:	The source page is located in a noexec partition.
401 * - -ENOMEM:	Out of EPC pages.
402 * - -EINTR:	The call was interrupted before data was processed.
403 * - -EIO:	Either EADD or EEXTEND failed because invalid source address
404 *		or power cycle.
405 * - -errno:	POSIX error.
406 */
407static long sgx_ioc_enclave_add_pages(struct sgx_encl *encl, void __user *arg)
408{
409	struct sgx_enclave_add_pages add_arg;
410	struct sgx_secinfo secinfo;
411	unsigned long c;
412	int ret;
413
414	if (!test_bit(SGX_ENCL_CREATED, &encl->flags) ||
415	    test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
416		return -EINVAL;
417
418	if (copy_from_user(&add_arg, arg, sizeof(add_arg)))
419		return -EFAULT;
420
421	if (!IS_ALIGNED(add_arg.src, PAGE_SIZE))
422		return -EINVAL;
423
424	if (sgx_validate_offset_length(encl, add_arg.offset, add_arg.length))
425		return -EINVAL;
426
427	if (copy_from_user(&secinfo, (void __user *)add_arg.secinfo,
428			   sizeof(secinfo)))
429		return -EFAULT;
430
431	if (sgx_validate_secinfo(&secinfo))
432		return -EINVAL;
433
434	for (c = 0 ; c < add_arg.length; c += PAGE_SIZE) {
435		if (signal_pending(current)) {
436			if (!c)
437				ret = -ERESTARTSYS;
438
439			break;
440		}
441
442		if (need_resched())
443			cond_resched();
444
445		ret = sgx_encl_add_page(encl, add_arg.src + c, add_arg.offset + c,
446					&secinfo, add_arg.flags);
447		if (ret)
448			break;
449	}
450
451	add_arg.count = c;
452
453	if (copy_to_user(arg, &add_arg, sizeof(add_arg)))
454		return -EFAULT;
455
456	return ret;
457}
458
459static int __sgx_get_key_hash(struct crypto_shash *tfm, const void *modulus,
460			      void *hash)
461{
462	SHASH_DESC_ON_STACK(shash, tfm);
463
464	shash->tfm = tfm;
465
466	return crypto_shash_digest(shash, modulus, SGX_MODULUS_SIZE, hash);
467}
468
469static int sgx_get_key_hash(const void *modulus, void *hash)
470{
471	struct crypto_shash *tfm;
472	int ret;
473
474	tfm = crypto_alloc_shash("sha256", 0, CRYPTO_ALG_ASYNC);
475	if (IS_ERR(tfm))
476		return PTR_ERR(tfm);
477
478	ret = __sgx_get_key_hash(tfm, modulus, hash);
479
480	crypto_free_shash(tfm);
481	return ret;
482}
483
484static int sgx_encl_init(struct sgx_encl *encl, struct sgx_sigstruct *sigstruct,
485			 void *token)
486{
487	u64 mrsigner[4];
488	int i, j;
489	void *addr;
490	int ret;
491
492	/*
493	 * Deny initializing enclaves with attributes (namely provisioning)
494	 * that have not been explicitly allowed.
495	 */
496	if (encl->attributes & ~encl->attributes_mask)
497		return -EACCES;
498
499	/*
500	 * Attributes should not be enforced *only* against what's available on
501	 * platform (done in sgx_encl_create) but checked and enforced against
502	 * the mask for enforcement in sigstruct. For example an enclave could
503	 * opt to sign with AVX bit in xfrm, but still be loadable on a platform
504	 * without it if the sigstruct->body.attributes_mask does not turn that
505	 * bit on.
506	 */
507	if (sigstruct->body.attributes & sigstruct->body.attributes_mask &
508	    sgx_attributes_reserved_mask)
509		return -EINVAL;
510
511	if (sigstruct->body.miscselect & sigstruct->body.misc_mask &
512	    sgx_misc_reserved_mask)
513		return -EINVAL;
514
515	if (sigstruct->body.xfrm & sigstruct->body.xfrm_mask &
516	    sgx_xfrm_reserved_mask)
517		return -EINVAL;
518
519	ret = sgx_get_key_hash(sigstruct->modulus, mrsigner);
520	if (ret)
521		return ret;
522
523	mutex_lock(&encl->lock);
524
525	/*
526	 * ENCLS[EINIT] is interruptible because it has such a high latency,
527	 * e.g. 50k+ cycles on success. If an IRQ/NMI/SMI becomes pending,
528	 * EINIT may fail with SGX_UNMASKED_EVENT so that the event can be
529	 * serviced.
530	 */
531	for (i = 0; i < SGX_EINIT_SLEEP_COUNT; i++) {
532		for (j = 0; j < SGX_EINIT_SPIN_COUNT; j++) {
533			addr = sgx_get_epc_virt_addr(encl->secs.epc_page);
534
535			preempt_disable();
536
537			sgx_update_lepubkeyhash(mrsigner);
538
539			ret = __einit(sigstruct, token, addr);
540
541			preempt_enable();
542
543			if (ret == SGX_UNMASKED_EVENT)
544				continue;
545			else
546				break;
547		}
548
549		if (ret != SGX_UNMASKED_EVENT)
550			break;
551
552		msleep_interruptible(SGX_EINIT_SLEEP_TIME);
553
554		if (signal_pending(current)) {
555			ret = -ERESTARTSYS;
556			goto err_out;
557		}
558	}
559
560	if (encls_faulted(ret)) {
561		if (encls_failed(ret))
562			ENCLS_WARN(ret, "EINIT");
563
564		ret = -EIO;
565	} else if (ret) {
566		pr_debug("EINIT returned %d\n", ret);
567		ret = -EPERM;
568	} else {
569		set_bit(SGX_ENCL_INITIALIZED, &encl->flags);
570	}
571
572err_out:
573	mutex_unlock(&encl->lock);
574	return ret;
575}
576
577/**
578 * sgx_ioc_enclave_init() - handler for %SGX_IOC_ENCLAVE_INIT
579 * @encl:	an enclave pointer
580 * @arg:	userspace pointer to a struct sgx_enclave_init instance
581 *
582 * Flush any outstanding enqueued EADD operations and perform EINIT.  The
583 * Launch Enclave Public Key Hash MSRs are rewritten as necessary to match
584 * the enclave's MRSIGNER, which is calculated from the provided sigstruct.
585 *
586 * Return:
587 * - 0:		Success.
588 * - -EPERM:	Invalid SIGSTRUCT.
589 * - -EIO:	EINIT failed because of a power cycle.
590 * - -errno:	POSIX error.
591 */
592static long sgx_ioc_enclave_init(struct sgx_encl *encl, void __user *arg)
593{
594	struct sgx_sigstruct *sigstruct;
595	struct sgx_enclave_init init_arg;
596	void *token;
597	int ret;
598
599	if (!test_bit(SGX_ENCL_CREATED, &encl->flags) ||
600	    test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
601		return -EINVAL;
602
603	if (copy_from_user(&init_arg, arg, sizeof(init_arg)))
604		return -EFAULT;
605
606	/*
607	 * 'sigstruct' must be on a page boundary and 'token' on a 512 byte
608	 * boundary.  kmalloc() will give this alignment when allocating
609	 * PAGE_SIZE bytes.
610	 */
611	sigstruct = kmalloc(PAGE_SIZE, GFP_KERNEL);
612	if (!sigstruct)
613		return -ENOMEM;
614
615	token = (void *)((unsigned long)sigstruct + PAGE_SIZE / 2);
616	memset(token, 0, SGX_LAUNCH_TOKEN_SIZE);
617
618	if (copy_from_user(sigstruct, (void __user *)init_arg.sigstruct,
619			   sizeof(*sigstruct))) {
620		ret = -EFAULT;
621		goto out;
622	}
623
624	/*
625	 * A legacy field used with Intel signed enclaves. These used to mean
626	 * regular and architectural enclaves. The CPU only accepts these values
627	 * but they do not have any other meaning.
628	 *
629	 * Thus, reject any other values.
630	 */
631	if (sigstruct->header.vendor != 0x0000 &&
632	    sigstruct->header.vendor != 0x8086) {
633		ret = -EINVAL;
634		goto out;
635	}
636
637	ret = sgx_encl_init(encl, sigstruct, token);
638
639out:
640	kfree(sigstruct);
641	return ret;
642}
643
644/**
645 * sgx_ioc_enclave_provision() - handler for %SGX_IOC_ENCLAVE_PROVISION
646 * @encl:	an enclave pointer
647 * @arg:	userspace pointer to a struct sgx_enclave_provision instance
648 *
649 * Allow ATTRIBUTE.PROVISION_KEY for an enclave by providing a file handle to
650 * /dev/sgx_provision.
651 *
652 * Return:
653 * - 0:		Success.
654 * - -errno:	Otherwise.
655 */
656static long sgx_ioc_enclave_provision(struct sgx_encl *encl, void __user *arg)
657{
658	struct sgx_enclave_provision params;
659
660	if (copy_from_user(&params, arg, sizeof(params)))
661		return -EFAULT;
662
663	return sgx_set_attribute(&encl->attributes_mask, params.fd);
664}
665
666/*
667 * Ensure enclave is ready for SGX2 functions. Readiness is checked
668 * by ensuring the hardware supports SGX2 and the enclave is initialized
669 * and thus able to handle requests to modify pages within it.
670 */
671static int sgx_ioc_sgx2_ready(struct sgx_encl *encl)
672{
673	if (!(cpu_feature_enabled(X86_FEATURE_SGX2)))
674		return -ENODEV;
675
676	if (!test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
677		return -EINVAL;
678
679	return 0;
680}
681
682/*
683 * Some SGX functions require that no cached linear-to-physical address
684 * mappings are present before they can succeed. Collaborate with
685 * hardware via ENCLS[ETRACK] to ensure that all cached
686 * linear-to-physical address mappings belonging to all threads of
687 * the enclave are cleared. See sgx_encl_cpumask() for details.
688 *
689 * Must be called with enclave's mutex held from the time the
690 * SGX function requiring that no cached linear-to-physical mappings
691 * are present is executed until this ETRACK flow is complete.
692 */
693static int sgx_enclave_etrack(struct sgx_encl *encl)
694{
695	void *epc_virt;
696	int ret;
697
698	epc_virt = sgx_get_epc_virt_addr(encl->secs.epc_page);
699	ret = __etrack(epc_virt);
700	if (ret) {
701		/*
702		 * ETRACK only fails when there is an OS issue. For
703		 * example, two consecutive ETRACK was sent without
704		 * completed IPI between.
705		 */
706		pr_err_once("ETRACK returned %d (0x%x)", ret, ret);
707		/*
708		 * Send IPIs to kick CPUs out of the enclave and
709		 * try ETRACK again.
710		 */
711		on_each_cpu_mask(sgx_encl_cpumask(encl), sgx_ipi_cb, NULL, 1);
712		ret = __etrack(epc_virt);
713		if (ret) {
714			pr_err_once("ETRACK repeat returned %d (0x%x)",
715				    ret, ret);
716			return -EFAULT;
717		}
718	}
719	on_each_cpu_mask(sgx_encl_cpumask(encl), sgx_ipi_cb, NULL, 1);
720
721	return 0;
722}
723
724/**
725 * sgx_enclave_restrict_permissions() - Restrict EPCM permissions
726 * @encl:	Enclave to which the pages belong.
727 * @modp:	Checked parameters from user on which pages need modifying and
728 *              their new permissions.
729 *
730 * Return:
731 * - 0:		Success.
732 * - -errno:	Otherwise.
733 */
734static long
735sgx_enclave_restrict_permissions(struct sgx_encl *encl,
736				 struct sgx_enclave_restrict_permissions *modp)
737{
738	struct sgx_encl_page *entry;
739	struct sgx_secinfo secinfo;
740	unsigned long addr;
741	unsigned long c;
742	void *epc_virt;
743	int ret;
744
745	memset(&secinfo, 0, sizeof(secinfo));
746	secinfo.flags = modp->permissions & SGX_SECINFO_PERMISSION_MASK;
747
748	for (c = 0 ; c < modp->length; c += PAGE_SIZE) {
749		addr = encl->base + modp->offset + c;
750
751		sgx_reclaim_direct();
752
753		mutex_lock(&encl->lock);
754
755		entry = sgx_encl_load_page(encl, addr);
756		if (IS_ERR(entry)) {
757			ret = PTR_ERR(entry) == -EBUSY ? -EAGAIN : -EFAULT;
758			goto out_unlock;
759		}
760
761		/*
762		 * Changing EPCM permissions is only supported on regular
763		 * SGX pages. Attempting this change on other pages will
764		 * result in #PF.
765		 */
766		if (entry->type != SGX_PAGE_TYPE_REG) {
767			ret = -EINVAL;
768			goto out_unlock;
769		}
770
771		/*
772		 * Apart from ensuring that read-access remains, do not verify
773		 * the permission bits requested. Kernel has no control over
774		 * how EPCM permissions can be relaxed from within the enclave.
775		 * ENCLS[EMODPR] can only remove existing EPCM permissions,
776		 * attempting to set new permissions will be ignored by the
777		 * hardware.
778		 */
779
780		/* Change EPCM permissions. */
781		epc_virt = sgx_get_epc_virt_addr(entry->epc_page);
782		ret = __emodpr(&secinfo, epc_virt);
783		if (encls_faulted(ret)) {
784			/*
785			 * All possible faults should be avoidable:
786			 * parameters have been checked, will only change
787			 * permissions of a regular page, and no concurrent
788			 * SGX1/SGX2 ENCLS instructions since these
789			 * are protected with mutex.
790			 */
791			pr_err_once("EMODPR encountered exception %d\n",
792				    ENCLS_TRAPNR(ret));
793			ret = -EFAULT;
794			goto out_unlock;
795		}
796		if (encls_failed(ret)) {
797			modp->result = ret;
798			ret = -EFAULT;
799			goto out_unlock;
800		}
801
802		ret = sgx_enclave_etrack(encl);
803		if (ret) {
804			ret = -EFAULT;
805			goto out_unlock;
806		}
807
808		mutex_unlock(&encl->lock);
809	}
810
811	ret = 0;
812	goto out;
813
814out_unlock:
815	mutex_unlock(&encl->lock);
816out:
817	modp->count = c;
818
819	return ret;
820}
821
822/**
823 * sgx_ioc_enclave_restrict_permissions() - handler for
824 *                                        %SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS
825 * @encl:	an enclave pointer
826 * @arg:	userspace pointer to a &struct sgx_enclave_restrict_permissions
827 *		instance
828 *
829 * SGX2 distinguishes between relaxing and restricting the enclave page
830 * permissions maintained by the hardware (EPCM permissions) of pages
831 * belonging to an initialized enclave (after SGX_IOC_ENCLAVE_INIT).
832 *
833 * EPCM permissions cannot be restricted from within the enclave, the enclave
834 * requires the kernel to run the privileged level 0 instructions ENCLS[EMODPR]
835 * and ENCLS[ETRACK]. An attempt to relax EPCM permissions with this call
836 * will be ignored by the hardware.
837 *
838 * Return:
839 * - 0:		Success
840 * - -errno:	Otherwise
841 */
842static long sgx_ioc_enclave_restrict_permissions(struct sgx_encl *encl,
843						 void __user *arg)
844{
845	struct sgx_enclave_restrict_permissions params;
846	long ret;
847
848	ret = sgx_ioc_sgx2_ready(encl);
849	if (ret)
850		return ret;
851
852	if (copy_from_user(&params, arg, sizeof(params)))
853		return -EFAULT;
854
855	if (sgx_validate_offset_length(encl, params.offset, params.length))
856		return -EINVAL;
857
858	if (params.permissions & ~SGX_SECINFO_PERMISSION_MASK)
859		return -EINVAL;
860
861	/*
862	 * Fail early if invalid permissions requested to prevent ENCLS[EMODPR]
863	 * from faulting later when the CPU does the same check.
864	 */
865	if ((params.permissions & SGX_SECINFO_W) &&
866	    !(params.permissions & SGX_SECINFO_R))
867		return -EINVAL;
868
869	if (params.result || params.count)
870		return -EINVAL;
871
872	ret = sgx_enclave_restrict_permissions(encl, &params);
873
874	if (copy_to_user(arg, &params, sizeof(params)))
875		return -EFAULT;
876
877	return ret;
878}
879
880/**
881 * sgx_enclave_modify_types() - Modify type of SGX enclave pages
882 * @encl:	Enclave to which the pages belong.
883 * @modt:	Checked parameters from user about which pages need modifying
884 *              and their new page type.
885 *
886 * Return:
887 * - 0:		Success
888 * - -errno:	Otherwise
889 */
890static long sgx_enclave_modify_types(struct sgx_encl *encl,
891				     struct sgx_enclave_modify_types *modt)
892{
893	unsigned long max_prot_restore;
894	enum sgx_page_type page_type;
895	struct sgx_encl_page *entry;
896	struct sgx_secinfo secinfo;
897	unsigned long prot;
898	unsigned long addr;
899	unsigned long c;
900	void *epc_virt;
901	int ret;
902
903	page_type = modt->page_type & SGX_PAGE_TYPE_MASK;
904
905	/*
906	 * The only new page types allowed by hardware are PT_TCS and PT_TRIM.
907	 */
908	if (page_type != SGX_PAGE_TYPE_TCS && page_type != SGX_PAGE_TYPE_TRIM)
909		return -EINVAL;
910
911	memset(&secinfo, 0, sizeof(secinfo));
912
913	secinfo.flags = page_type << 8;
914
915	for (c = 0 ; c < modt->length; c += PAGE_SIZE) {
916		addr = encl->base + modt->offset + c;
917
918		sgx_reclaim_direct();
919
920		mutex_lock(&encl->lock);
921
922		entry = sgx_encl_load_page(encl, addr);
923		if (IS_ERR(entry)) {
924			ret = PTR_ERR(entry) == -EBUSY ? -EAGAIN : -EFAULT;
925			goto out_unlock;
926		}
927
928		/*
929		 * Borrow the logic from the Intel SDM. Regular pages
930		 * (SGX_PAGE_TYPE_REG) can change type to SGX_PAGE_TYPE_TCS
931		 * or SGX_PAGE_TYPE_TRIM but TCS pages can only be trimmed.
932		 * CET pages not supported yet.
933		 */
934		if (!(entry->type == SGX_PAGE_TYPE_REG ||
935		      (entry->type == SGX_PAGE_TYPE_TCS &&
936		       page_type == SGX_PAGE_TYPE_TRIM))) {
937			ret = -EINVAL;
938			goto out_unlock;
939		}
940
941		max_prot_restore = entry->vm_max_prot_bits;
942
943		/*
944		 * Once a regular page becomes a TCS page it cannot be
945		 * changed back. So the maximum allowed protection reflects
946		 * the TCS page that is always RW from kernel perspective but
947		 * will be inaccessible from within enclave. Before doing
948		 * so, do make sure that the new page type continues to
949		 * respect the originally vetted page permissions.
950		 */
951		if (entry->type == SGX_PAGE_TYPE_REG &&
952		    page_type == SGX_PAGE_TYPE_TCS) {
953			if (~entry->vm_max_prot_bits & (VM_READ | VM_WRITE)) {
954				ret = -EPERM;
955				goto out_unlock;
956			}
957			prot = PROT_READ | PROT_WRITE;
958			entry->vm_max_prot_bits = calc_vm_prot_bits(prot, 0);
959
960			/*
961			 * Prevent page from being reclaimed while mutex
962			 * is released.
963			 */
964			if (sgx_unmark_page_reclaimable(entry->epc_page)) {
965				ret = -EAGAIN;
966				goto out_entry_changed;
967			}
968
969			/*
970			 * Do not keep encl->lock because of dependency on
971			 * mmap_lock acquired in sgx_zap_enclave_ptes().
972			 */
973			mutex_unlock(&encl->lock);
974
975			sgx_zap_enclave_ptes(encl, addr);
976
977			mutex_lock(&encl->lock);
978
979			sgx_mark_page_reclaimable(entry->epc_page);
980		}
981
982		/* Change EPC type */
983		epc_virt = sgx_get_epc_virt_addr(entry->epc_page);
984		ret = __emodt(&secinfo, epc_virt);
985		if (encls_faulted(ret)) {
986			/*
987			 * All possible faults should be avoidable:
988			 * parameters have been checked, will only change
989			 * valid page types, and no concurrent
990			 * SGX1/SGX2 ENCLS instructions since these are
991			 * protected with mutex.
992			 */
993			pr_err_once("EMODT encountered exception %d\n",
994				    ENCLS_TRAPNR(ret));
995			ret = -EFAULT;
996			goto out_entry_changed;
997		}
998		if (encls_failed(ret)) {
999			modt->result = ret;
1000			ret = -EFAULT;
1001			goto out_entry_changed;
1002		}
1003
1004		ret = sgx_enclave_etrack(encl);
1005		if (ret) {
1006			ret = -EFAULT;
1007			goto out_unlock;
1008		}
1009
1010		entry->type = page_type;
1011
1012		mutex_unlock(&encl->lock);
1013	}
1014
1015	ret = 0;
1016	goto out;
1017
1018out_entry_changed:
1019	entry->vm_max_prot_bits = max_prot_restore;
1020out_unlock:
1021	mutex_unlock(&encl->lock);
1022out:
1023	modt->count = c;
1024
1025	return ret;
1026}
1027
1028/**
1029 * sgx_ioc_enclave_modify_types() - handler for %SGX_IOC_ENCLAVE_MODIFY_TYPES
1030 * @encl:	an enclave pointer
1031 * @arg:	userspace pointer to a &struct sgx_enclave_modify_types instance
1032 *
1033 * Ability to change the enclave page type supports the following use cases:
1034 *
1035 * * It is possible to add TCS pages to an enclave by changing the type of
1036 *   regular pages (%SGX_PAGE_TYPE_REG) to TCS (%SGX_PAGE_TYPE_TCS) pages.
1037 *   With this support the number of threads supported by an initialized
1038 *   enclave can be increased dynamically.
1039 *
1040 * * Regular or TCS pages can dynamically be removed from an initialized
1041 *   enclave by changing the page type to %SGX_PAGE_TYPE_TRIM. Changing the
1042 *   page type to %SGX_PAGE_TYPE_TRIM marks the page for removal with actual
1043 *   removal done by handler of %SGX_IOC_ENCLAVE_REMOVE_PAGES ioctl() called
1044 *   after ENCLU[EACCEPT] is run on %SGX_PAGE_TYPE_TRIM page from within the
1045 *   enclave.
1046 *
1047 * Return:
1048 * - 0:		Success
1049 * - -errno:	Otherwise
1050 */
1051static long sgx_ioc_enclave_modify_types(struct sgx_encl *encl,
1052					 void __user *arg)
1053{
1054	struct sgx_enclave_modify_types params;
1055	long ret;
1056
1057	ret = sgx_ioc_sgx2_ready(encl);
1058	if (ret)
1059		return ret;
1060
1061	if (copy_from_user(&params, arg, sizeof(params)))
1062		return -EFAULT;
1063
1064	if (sgx_validate_offset_length(encl, params.offset, params.length))
1065		return -EINVAL;
1066
1067	if (params.page_type & ~SGX_PAGE_TYPE_MASK)
1068		return -EINVAL;
1069
1070	if (params.result || params.count)
1071		return -EINVAL;
1072
1073	ret = sgx_enclave_modify_types(encl, &params);
1074
1075	if (copy_to_user(arg, &params, sizeof(params)))
1076		return -EFAULT;
1077
1078	return ret;
1079}
1080
1081/**
1082 * sgx_encl_remove_pages() - Remove trimmed pages from SGX enclave
1083 * @encl:	Enclave to which the pages belong
1084 * @params:	Checked parameters from user on which pages need to be removed
1085 *
1086 * Return:
1087 * - 0:		Success.
1088 * - -errno:	Otherwise.
1089 */
1090static long sgx_encl_remove_pages(struct sgx_encl *encl,
1091				  struct sgx_enclave_remove_pages *params)
1092{
1093	struct sgx_encl_page *entry;
1094	struct sgx_secinfo secinfo;
1095	unsigned long addr;
1096	unsigned long c;
1097	void *epc_virt;
1098	int ret;
1099
1100	memset(&secinfo, 0, sizeof(secinfo));
1101	secinfo.flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_X;
1102
1103	for (c = 0 ; c < params->length; c += PAGE_SIZE) {
1104		addr = encl->base + params->offset + c;
1105
1106		sgx_reclaim_direct();
1107
1108		mutex_lock(&encl->lock);
1109
1110		entry = sgx_encl_load_page(encl, addr);
1111		if (IS_ERR(entry)) {
1112			ret = PTR_ERR(entry) == -EBUSY ? -EAGAIN : -EFAULT;
1113			goto out_unlock;
1114		}
1115
1116		if (entry->type != SGX_PAGE_TYPE_TRIM) {
1117			ret = -EPERM;
1118			goto out_unlock;
1119		}
1120
1121		/*
1122		 * ENCLS[EMODPR] is a no-op instruction used to inform if
1123		 * ENCLU[EACCEPT] was run from within the enclave. If
1124		 * ENCLS[EMODPR] is run with RWX on a trimmed page that is
1125		 * not yet accepted then it will return
1126		 * %SGX_PAGE_NOT_MODIFIABLE, after the trimmed page is
1127		 * accepted the instruction will encounter a page fault.
1128		 */
1129		epc_virt = sgx_get_epc_virt_addr(entry->epc_page);
1130		ret = __emodpr(&secinfo, epc_virt);
1131		if (!encls_faulted(ret) || ENCLS_TRAPNR(ret) != X86_TRAP_PF) {
1132			ret = -EPERM;
1133			goto out_unlock;
1134		}
1135
1136		if (sgx_unmark_page_reclaimable(entry->epc_page)) {
1137			ret = -EBUSY;
1138			goto out_unlock;
1139		}
1140
1141		/*
1142		 * Do not keep encl->lock because of dependency on
1143		 * mmap_lock acquired in sgx_zap_enclave_ptes().
1144		 */
1145		mutex_unlock(&encl->lock);
1146
1147		sgx_zap_enclave_ptes(encl, addr);
1148
1149		mutex_lock(&encl->lock);
1150
1151		sgx_encl_free_epc_page(entry->epc_page);
1152		encl->secs_child_cnt--;
1153		entry->epc_page = NULL;
1154		xa_erase(&encl->page_array, PFN_DOWN(entry->desc));
1155		sgx_encl_shrink(encl, NULL);
1156		kfree(entry);
1157
1158		mutex_unlock(&encl->lock);
1159	}
1160
1161	ret = 0;
1162	goto out;
1163
1164out_unlock:
1165	mutex_unlock(&encl->lock);
1166out:
1167	params->count = c;
1168
1169	return ret;
1170}
1171
1172/**
1173 * sgx_ioc_enclave_remove_pages() - handler for %SGX_IOC_ENCLAVE_REMOVE_PAGES
1174 * @encl:	an enclave pointer
1175 * @arg:	userspace pointer to &struct sgx_enclave_remove_pages instance
1176 *
1177 * Final step of the flow removing pages from an initialized enclave. The
1178 * complete flow is:
1179 *
1180 * 1) User changes the type of the pages to be removed to %SGX_PAGE_TYPE_TRIM
1181 *    using the %SGX_IOC_ENCLAVE_MODIFY_TYPES ioctl().
1182 * 2) User approves the page removal by running ENCLU[EACCEPT] from within
1183 *    the enclave.
1184 * 3) User initiates actual page removal using the
1185 *    %SGX_IOC_ENCLAVE_REMOVE_PAGES ioctl() that is handled here.
1186 *
1187 * First remove any page table entries pointing to the page and then proceed
1188 * with the actual removal of the enclave page and data in support of it.
1189 *
1190 * VA pages are not affected by this removal. It is thus possible that the
1191 * enclave may end up with more VA pages than needed to support all its
1192 * pages.
1193 *
1194 * Return:
1195 * - 0:		Success
1196 * - -errno:	Otherwise
1197 */
1198static long sgx_ioc_enclave_remove_pages(struct sgx_encl *encl,
1199					 void __user *arg)
1200{
1201	struct sgx_enclave_remove_pages params;
1202	long ret;
1203
1204	ret = sgx_ioc_sgx2_ready(encl);
1205	if (ret)
1206		return ret;
1207
1208	if (copy_from_user(&params, arg, sizeof(params)))
1209		return -EFAULT;
1210
1211	if (sgx_validate_offset_length(encl, params.offset, params.length))
1212		return -EINVAL;
1213
1214	if (params.count)
1215		return -EINVAL;
1216
1217	ret = sgx_encl_remove_pages(encl, &params);
1218
1219	if (copy_to_user(arg, &params, sizeof(params)))
1220		return -EFAULT;
1221
1222	return ret;
1223}
1224
1225long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
1226{
1227	struct sgx_encl *encl = filep->private_data;
1228	int ret;
1229
1230	if (test_and_set_bit(SGX_ENCL_IOCTL, &encl->flags))
1231		return -EBUSY;
1232
1233	switch (cmd) {
1234	case SGX_IOC_ENCLAVE_CREATE:
1235		ret = sgx_ioc_enclave_create(encl, (void __user *)arg);
1236		break;
1237	case SGX_IOC_ENCLAVE_ADD_PAGES:
1238		ret = sgx_ioc_enclave_add_pages(encl, (void __user *)arg);
1239		break;
1240	case SGX_IOC_ENCLAVE_INIT:
1241		ret = sgx_ioc_enclave_init(encl, (void __user *)arg);
1242		break;
1243	case SGX_IOC_ENCLAVE_PROVISION:
1244		ret = sgx_ioc_enclave_provision(encl, (void __user *)arg);
1245		break;
1246	case SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS:
1247		ret = sgx_ioc_enclave_restrict_permissions(encl,
1248							   (void __user *)arg);
1249		break;
1250	case SGX_IOC_ENCLAVE_MODIFY_TYPES:
1251		ret = sgx_ioc_enclave_modify_types(encl, (void __user *)arg);
1252		break;
1253	case SGX_IOC_ENCLAVE_REMOVE_PAGES:
1254		ret = sgx_ioc_enclave_remove_pages(encl, (void __user *)arg);
1255		break;
1256	default:
1257		ret = -ENOIOCTLCMD;
1258		break;
1259	}
1260
1261	clear_bit(SGX_ENCL_IOCTL, &encl->flags);
1262	return ret;
1263}
1264