1// SPDX-License-Identifier: GPL-2.0
2#define _GNU_SOURCE
3#include <stdio.h>
4#include <stdlib.h>
5#include <string.h>
6#include <sys/syscall.h>
7#include <time.h>
8#include <signal.h>
9#include <setjmp.h>
10#include <sys/mman.h>
11#include <sys/utsname.h>
12#include <sys/wait.h>
13#include <sys/stat.h>
14#include <fcntl.h>
15#include <inttypes.h>
16#include <sched.h>
17
18#include <sys/uio.h>
19#include <linux/io_uring.h>
20#include "../kselftest.h"
21
22#ifndef __x86_64__
23# error This test is 64-bit only
24#endif
25
26/* LAM modes, these definitions were copied from kernel code */
27#define LAM_NONE                0
28#define LAM_U57_BITS            6
29
30#define LAM_U57_MASK            (0x3fULL << 57)
31/* arch prctl for LAM */
32#define ARCH_GET_UNTAG_MASK     0x4001
33#define ARCH_ENABLE_TAGGED_ADDR 0x4002
34#define ARCH_GET_MAX_TAG_BITS   0x4003
35#define ARCH_FORCE_TAGGED_SVA	0x4004
36
37/* Specified test function bits */
38#define FUNC_MALLOC             0x1
39#define FUNC_BITS               0x2
40#define FUNC_MMAP               0x4
41#define FUNC_SYSCALL            0x8
42#define FUNC_URING              0x10
43#define FUNC_INHERITE           0x20
44#define FUNC_PASID              0x40
45
46#define TEST_MASK               0x7f
47
48#define LOW_ADDR                (0x1UL << 30)
49#define HIGH_ADDR               (0x3UL << 48)
50
51#define MALLOC_LEN              32
52
53#define PAGE_SIZE               (4 << 10)
54
55#define STACK_SIZE		65536
56
57#define barrier() ({						\
58		   __asm__ __volatile__("" : : : "memory");	\
59})
60
61#define URING_QUEUE_SZ 1
62#define URING_BLOCK_SZ 2048
63
64/* Pasid test define */
65#define LAM_CMD_BIT 0x1
66#define PAS_CMD_BIT 0x2
67#define SVA_CMD_BIT 0x4
68
69#define PAS_CMD(cmd1, cmd2, cmd3) (((cmd3) << 8) | ((cmd2) << 4) | ((cmd1) << 0))
70
71struct testcases {
72	unsigned int later;
73	int expected; /* 2: SIGSEGV Error; 1: other errors */
74	unsigned long lam;
75	uint64_t addr;
76	uint64_t cmd;
77	int (*test_func)(struct testcases *test);
78	const char *msg;
79};
80
81/* Used by CQ of uring, source file handler and file's size */
82struct file_io {
83	int file_fd;
84	off_t file_sz;
85	struct iovec iovecs[];
86};
87
88struct io_uring_queue {
89	unsigned int *head;
90	unsigned int *tail;
91	unsigned int *ring_mask;
92	unsigned int *ring_entries;
93	unsigned int *flags;
94	unsigned int *array;
95	union {
96		struct io_uring_cqe *cqes;
97		struct io_uring_sqe *sqes;
98	} queue;
99	size_t ring_sz;
100};
101
102struct io_ring {
103	int ring_fd;
104	struct io_uring_queue sq_ring;
105	struct io_uring_queue cq_ring;
106};
107
108int tests_cnt;
109jmp_buf segv_env;
110
111static void segv_handler(int sig)
112{
113	ksft_print_msg("Get segmentation fault(%d).", sig);
114
115	siglongjmp(segv_env, 1);
116}
117
118static inline int cpu_has_lam(void)
119{
120	unsigned int cpuinfo[4];
121
122	__cpuid_count(0x7, 1, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]);
123
124	return (cpuinfo[0] & (1 << 26));
125}
126
127/* Check 5-level page table feature in CPUID.(EAX=07H, ECX=00H):ECX.[bit 16] */
128static inline int cpu_has_la57(void)
129{
130	unsigned int cpuinfo[4];
131
132	__cpuid_count(0x7, 0, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]);
133
134	return (cpuinfo[2] & (1 << 16));
135}
136
137/*
138 * Set tagged address and read back untag mask.
139 * check if the untagged mask is expected.
140 *
141 * @return:
142 * 0: Set LAM mode successfully
143 * others: failed to set LAM
144 */
145static int set_lam(unsigned long lam)
146{
147	int ret = 0;
148	uint64_t ptr = 0;
149
150	if (lam != LAM_U57_BITS && lam != LAM_NONE)
151		return -1;
152
153	/* Skip check return */
154	syscall(SYS_arch_prctl, ARCH_ENABLE_TAGGED_ADDR, lam);
155
156	/* Get untagged mask */
157	syscall(SYS_arch_prctl, ARCH_GET_UNTAG_MASK, &ptr);
158
159	/* Check mask returned is expected */
160	if (lam == LAM_U57_BITS)
161		ret = (ptr != ~(LAM_U57_MASK));
162	else if (lam == LAM_NONE)
163		ret = (ptr != -1ULL);
164
165	return ret;
166}
167
168static unsigned long get_default_tag_bits(void)
169{
170	pid_t pid;
171	int lam = LAM_NONE;
172	int ret = 0;
173
174	pid = fork();
175	if (pid < 0) {
176		perror("Fork failed.");
177	} else if (pid == 0) {
178		/* Set LAM mode in child process */
179		if (set_lam(LAM_U57_BITS) == 0)
180			lam = LAM_U57_BITS;
181		else
182			lam = LAM_NONE;
183		exit(lam);
184	} else {
185		wait(&ret);
186		lam = WEXITSTATUS(ret);
187	}
188
189	return lam;
190}
191
192/*
193 * Set tagged address and read back untag mask.
194 * check if the untag mask is expected.
195 */
196static int get_lam(void)
197{
198	uint64_t ptr = 0;
199	int ret = -1;
200	/* Get untagged mask */
201	if (syscall(SYS_arch_prctl, ARCH_GET_UNTAG_MASK, &ptr) == -1)
202		return -1;
203
204	/* Check mask returned is expected */
205	if (ptr == ~(LAM_U57_MASK))
206		ret = LAM_U57_BITS;
207	else if (ptr == -1ULL)
208		ret = LAM_NONE;
209
210
211	return ret;
212}
213
214/* According to LAM mode, set metadata in high bits */
215static uint64_t set_metadata(uint64_t src, unsigned long lam)
216{
217	uint64_t metadata;
218
219	srand(time(NULL));
220
221	switch (lam) {
222	case LAM_U57_BITS: /* Set metadata in bits 62:57 */
223		/* Get a random non-zero value as metadata */
224		metadata = (rand() % ((1UL << LAM_U57_BITS) - 1) + 1) << 57;
225		metadata |= (src & ~(LAM_U57_MASK));
226		break;
227	default:
228		metadata = src;
229		break;
230	}
231
232	return metadata;
233}
234
235/*
236 * Set metadata in user pointer, compare new pointer with original pointer.
237 * both pointers should point to the same address.
238 *
239 * @return:
240 * 0: value on the pointer with metadate and value on original are same
241 * 1: not same.
242 */
243static int handle_lam_test(void *src, unsigned int lam)
244{
245	char *ptr;
246
247	strcpy((char *)src, "USER POINTER");
248
249	ptr = (char *)set_metadata((uint64_t)src, lam);
250	if (src == ptr)
251		return 0;
252
253	/* Copy a string into the pointer with metadata */
254	strcpy((char *)ptr, "METADATA POINTER");
255
256	return (!!strcmp((char *)src, (char *)ptr));
257}
258
259
260int handle_max_bits(struct testcases *test)
261{
262	unsigned long exp_bits = get_default_tag_bits();
263	unsigned long bits = 0;
264
265	if (exp_bits != LAM_NONE)
266		exp_bits = LAM_U57_BITS;
267
268	/* Get LAM max tag bits */
269	if (syscall(SYS_arch_prctl, ARCH_GET_MAX_TAG_BITS, &bits) == -1)
270		return 1;
271
272	return (exp_bits != bits);
273}
274
275/*
276 * Test lam feature through dereference pointer get from malloc.
277 * @return 0: Pass test. 1: Get failure during test 2: Get SIGSEGV
278 */
279static int handle_malloc(struct testcases *test)
280{
281	char *ptr = NULL;
282	int ret = 0;
283
284	if (test->later == 0 && test->lam != 0)
285		if (set_lam(test->lam) == -1)
286			return 1;
287
288	ptr = (char *)malloc(MALLOC_LEN);
289	if (ptr == NULL) {
290		perror("malloc() failure\n");
291		return 1;
292	}
293
294	/* Set signal handler */
295	if (sigsetjmp(segv_env, 1) == 0) {
296		signal(SIGSEGV, segv_handler);
297		ret = handle_lam_test(ptr, test->lam);
298	} else {
299		ret = 2;
300	}
301
302	if (test->later != 0 && test->lam != 0)
303		if (set_lam(test->lam) == -1 && ret == 0)
304			ret = 1;
305
306	free(ptr);
307
308	return ret;
309}
310
311static int handle_mmap(struct testcases *test)
312{
313	void *ptr;
314	unsigned int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED;
315	int ret = 0;
316
317	if (test->later == 0 && test->lam != 0)
318		if (set_lam(test->lam) != 0)
319			return 1;
320
321	ptr = mmap((void *)test->addr, PAGE_SIZE, PROT_READ | PROT_WRITE,
322		   flags, -1, 0);
323	if (ptr == MAP_FAILED) {
324		if (test->addr == HIGH_ADDR)
325			if (!cpu_has_la57())
326				return 3; /* unsupport LA57 */
327		return 1;
328	}
329
330	if (test->later != 0 && test->lam != 0)
331		if (set_lam(test->lam) != 0)
332			ret = 1;
333
334	if (ret == 0) {
335		if (sigsetjmp(segv_env, 1) == 0) {
336			signal(SIGSEGV, segv_handler);
337			ret = handle_lam_test(ptr, test->lam);
338		} else {
339			ret = 2;
340		}
341	}
342
343	munmap(ptr, PAGE_SIZE);
344	return ret;
345}
346
347static int handle_syscall(struct testcases *test)
348{
349	struct utsname unme, *pu;
350	int ret = 0;
351
352	if (test->later == 0 && test->lam != 0)
353		if (set_lam(test->lam) != 0)
354			return 1;
355
356	if (sigsetjmp(segv_env, 1) == 0) {
357		signal(SIGSEGV, segv_handler);
358		pu = (struct utsname *)set_metadata((uint64_t)&unme, test->lam);
359		ret = uname(pu);
360		if (ret < 0)
361			ret = 1;
362	} else {
363		ret = 2;
364	}
365
366	if (test->later != 0 && test->lam != 0)
367		if (set_lam(test->lam) != -1 && ret == 0)
368			ret = 1;
369
370	return ret;
371}
372
373int sys_uring_setup(unsigned int entries, struct io_uring_params *p)
374{
375	return (int)syscall(__NR_io_uring_setup, entries, p);
376}
377
378int sys_uring_enter(int fd, unsigned int to, unsigned int min, unsigned int flags)
379{
380	return (int)syscall(__NR_io_uring_enter, fd, to, min, flags, NULL, 0);
381}
382
383/* Init submission queue and completion queue */
384int mmap_io_uring(struct io_uring_params p, struct io_ring *s)
385{
386	struct io_uring_queue *sring = &s->sq_ring;
387	struct io_uring_queue *cring = &s->cq_ring;
388
389	sring->ring_sz = p.sq_off.array + p.sq_entries * sizeof(unsigned int);
390	cring->ring_sz = p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe);
391
392	if (p.features & IORING_FEAT_SINGLE_MMAP) {
393		if (cring->ring_sz > sring->ring_sz)
394			sring->ring_sz = cring->ring_sz;
395
396		cring->ring_sz = sring->ring_sz;
397	}
398
399	void *sq_ptr = mmap(0, sring->ring_sz, PROT_READ | PROT_WRITE,
400			    MAP_SHARED | MAP_POPULATE, s->ring_fd,
401			    IORING_OFF_SQ_RING);
402
403	if (sq_ptr == MAP_FAILED) {
404		perror("sub-queue!");
405		return 1;
406	}
407
408	void *cq_ptr = sq_ptr;
409
410	if (!(p.features & IORING_FEAT_SINGLE_MMAP)) {
411		cq_ptr = mmap(0, cring->ring_sz, PROT_READ | PROT_WRITE,
412			      MAP_SHARED | MAP_POPULATE, s->ring_fd,
413			      IORING_OFF_CQ_RING);
414		if (cq_ptr == MAP_FAILED) {
415			perror("cpl-queue!");
416			munmap(sq_ptr, sring->ring_sz);
417			return 1;
418		}
419	}
420
421	sring->head = sq_ptr + p.sq_off.head;
422	sring->tail = sq_ptr + p.sq_off.tail;
423	sring->ring_mask = sq_ptr + p.sq_off.ring_mask;
424	sring->ring_entries = sq_ptr + p.sq_off.ring_entries;
425	sring->flags = sq_ptr + p.sq_off.flags;
426	sring->array = sq_ptr + p.sq_off.array;
427
428	/* Map a queue as mem map */
429	s->sq_ring.queue.sqes = mmap(0, p.sq_entries * sizeof(struct io_uring_sqe),
430				     PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
431				     s->ring_fd, IORING_OFF_SQES);
432	if (s->sq_ring.queue.sqes == MAP_FAILED) {
433		munmap(sq_ptr, sring->ring_sz);
434		if (sq_ptr != cq_ptr) {
435			ksft_print_msg("failed to mmap uring queue!");
436			munmap(cq_ptr, cring->ring_sz);
437			return 1;
438		}
439	}
440
441	cring->head = cq_ptr + p.cq_off.head;
442	cring->tail = cq_ptr + p.cq_off.tail;
443	cring->ring_mask = cq_ptr + p.cq_off.ring_mask;
444	cring->ring_entries = cq_ptr + p.cq_off.ring_entries;
445	cring->queue.cqes = cq_ptr + p.cq_off.cqes;
446
447	return 0;
448}
449
450/* Init io_uring queues */
451int setup_io_uring(struct io_ring *s)
452{
453	struct io_uring_params para;
454
455	memset(&para, 0, sizeof(para));
456	s->ring_fd = sys_uring_setup(URING_QUEUE_SZ, &para);
457	if (s->ring_fd < 0)
458		return 1;
459
460	return mmap_io_uring(para, s);
461}
462
463/*
464 * Get data from completion queue. the data buffer saved the file data
465 * return 0: success; others: error;
466 */
467int handle_uring_cq(struct io_ring *s)
468{
469	struct file_io *fi = NULL;
470	struct io_uring_queue *cring = &s->cq_ring;
471	struct io_uring_cqe *cqe;
472	unsigned int head;
473	off_t len = 0;
474
475	head = *cring->head;
476
477	do {
478		barrier();
479		if (head == *cring->tail)
480			break;
481		/* Get the entry */
482		cqe = &cring->queue.cqes[head & *s->cq_ring.ring_mask];
483		fi = (struct file_io *)cqe->user_data;
484		if (cqe->res < 0)
485			break;
486
487		int blocks = (int)(fi->file_sz + URING_BLOCK_SZ - 1) / URING_BLOCK_SZ;
488
489		for (int i = 0; i < blocks; i++)
490			len += fi->iovecs[i].iov_len;
491
492		head++;
493	} while (1);
494
495	*cring->head = head;
496	barrier();
497
498	return (len != fi->file_sz);
499}
500
501/*
502 * Submit squeue. specify via IORING_OP_READV.
503 * the buffer need to be set metadata according to LAM mode
504 */
505int handle_uring_sq(struct io_ring *ring, struct file_io *fi, unsigned long lam)
506{
507	int file_fd = fi->file_fd;
508	struct io_uring_queue *sring = &ring->sq_ring;
509	unsigned int index = 0, cur_block = 0, tail = 0, next_tail = 0;
510	struct io_uring_sqe *sqe;
511
512	off_t remain = fi->file_sz;
513	int blocks = (int)(remain + URING_BLOCK_SZ - 1) / URING_BLOCK_SZ;
514
515	while (remain) {
516		off_t bytes = remain;
517		void *buf;
518
519		if (bytes > URING_BLOCK_SZ)
520			bytes = URING_BLOCK_SZ;
521
522		fi->iovecs[cur_block].iov_len = bytes;
523
524		if (posix_memalign(&buf, URING_BLOCK_SZ, URING_BLOCK_SZ))
525			return 1;
526
527		fi->iovecs[cur_block].iov_base = (void *)set_metadata((uint64_t)buf, lam);
528		remain -= bytes;
529		cur_block++;
530	}
531
532	next_tail = *sring->tail;
533	tail = next_tail;
534	next_tail++;
535
536	barrier();
537
538	index = tail & *ring->sq_ring.ring_mask;
539
540	sqe = &ring->sq_ring.queue.sqes[index];
541	sqe->fd = file_fd;
542	sqe->flags = 0;
543	sqe->opcode = IORING_OP_READV;
544	sqe->addr = (unsigned long)fi->iovecs;
545	sqe->len = blocks;
546	sqe->off = 0;
547	sqe->user_data = (uint64_t)fi;
548
549	sring->array[index] = index;
550	tail = next_tail;
551
552	if (*sring->tail != tail) {
553		*sring->tail = tail;
554		barrier();
555	}
556
557	if (sys_uring_enter(ring->ring_fd, 1, 1, IORING_ENTER_GETEVENTS) < 0)
558		return 1;
559
560	return 0;
561}
562
563/*
564 * Test LAM in async I/O and io_uring, read current binery through io_uring
565 * Set metadata in pointers to iovecs buffer.
566 */
567int do_uring(unsigned long lam)
568{
569	struct io_ring *ring;
570	struct file_io *fi;
571	struct stat st;
572	int ret = 1;
573	char path[PATH_MAX] = {0};
574
575	/* get current process path */
576	if (readlink("/proc/self/exe", path, PATH_MAX - 1) <= 0)
577		return 1;
578
579	int file_fd = open(path, O_RDONLY);
580
581	if (file_fd < 0)
582		return 1;
583
584	if (fstat(file_fd, &st) < 0)
585		return 1;
586
587	off_t file_sz = st.st_size;
588
589	int blocks = (int)(file_sz + URING_BLOCK_SZ - 1) / URING_BLOCK_SZ;
590
591	fi = malloc(sizeof(*fi) + sizeof(struct iovec) * blocks);
592	if (!fi)
593		return 1;
594
595	fi->file_sz = file_sz;
596	fi->file_fd = file_fd;
597
598	ring = malloc(sizeof(*ring));
599	if (!ring)
600		return 1;
601
602	memset(ring, 0, sizeof(struct io_ring));
603
604	if (setup_io_uring(ring))
605		goto out;
606
607	if (handle_uring_sq(ring, fi, lam))
608		goto out;
609
610	ret = handle_uring_cq(ring);
611
612out:
613	free(ring);
614
615	for (int i = 0; i < blocks; i++) {
616		if (fi->iovecs[i].iov_base) {
617			uint64_t addr = ((uint64_t)fi->iovecs[i].iov_base);
618
619			switch (lam) {
620			case LAM_U57_BITS: /* Clear bits 62:57 */
621				addr = (addr & ~(LAM_U57_MASK));
622				break;
623			}
624			free((void *)addr);
625			fi->iovecs[i].iov_base = NULL;
626		}
627	}
628
629	free(fi);
630
631	return ret;
632}
633
634int handle_uring(struct testcases *test)
635{
636	int ret = 0;
637
638	if (test->later == 0 && test->lam != 0)
639		if (set_lam(test->lam) != 0)
640			return 1;
641
642	if (sigsetjmp(segv_env, 1) == 0) {
643		signal(SIGSEGV, segv_handler);
644		ret = do_uring(test->lam);
645	} else {
646		ret = 2;
647	}
648
649	return ret;
650}
651
652static int fork_test(struct testcases *test)
653{
654	int ret, child_ret;
655	pid_t pid;
656
657	pid = fork();
658	if (pid < 0) {
659		perror("Fork failed.");
660		ret = 1;
661	} else if (pid == 0) {
662		ret = test->test_func(test);
663		exit(ret);
664	} else {
665		wait(&child_ret);
666		ret = WEXITSTATUS(child_ret);
667	}
668
669	return ret;
670}
671
672static int handle_execve(struct testcases *test)
673{
674	int ret, child_ret;
675	int lam = test->lam;
676	pid_t pid;
677
678	pid = fork();
679	if (pid < 0) {
680		perror("Fork failed.");
681		ret = 1;
682	} else if (pid == 0) {
683		char path[PATH_MAX] = {0};
684
685		/* Set LAM mode in parent process */
686		if (set_lam(lam) != 0)
687			return 1;
688
689		/* Get current binary's path and the binary was run by execve */
690		if (readlink("/proc/self/exe", path, PATH_MAX - 1) <= 0)
691			exit(-1);
692
693		/* run binary to get LAM mode and return to parent process */
694		if (execlp(path, path, "-t 0x0", NULL) < 0) {
695			perror("error on exec");
696			exit(-1);
697		}
698	} else {
699		wait(&child_ret);
700		ret = WEXITSTATUS(child_ret);
701		if (ret != LAM_NONE)
702			return 1;
703	}
704
705	return 0;
706}
707
708static int handle_inheritance(struct testcases *test)
709{
710	int ret, child_ret;
711	int lam = test->lam;
712	pid_t pid;
713
714	/* Set LAM mode in parent process */
715	if (set_lam(lam) != 0)
716		return 1;
717
718	pid = fork();
719	if (pid < 0) {
720		perror("Fork failed.");
721		return 1;
722	} else if (pid == 0) {
723		/* Set LAM mode in parent process */
724		int child_lam = get_lam();
725
726		exit(child_lam);
727	} else {
728		wait(&child_ret);
729		ret = WEXITSTATUS(child_ret);
730
731		if (lam != ret)
732			return 1;
733	}
734
735	return 0;
736}
737
738static int thread_fn_get_lam(void *arg)
739{
740	return get_lam();
741}
742
743static int thread_fn_set_lam(void *arg)
744{
745	struct testcases *test = arg;
746
747	return set_lam(test->lam);
748}
749
750static int handle_thread(struct testcases *test)
751{
752	char stack[STACK_SIZE];
753	int ret, child_ret;
754	int lam = 0;
755	pid_t pid;
756
757	/* Set LAM mode in parent process */
758	if (!test->later) {
759		lam = test->lam;
760		if (set_lam(lam) != 0)
761			return 1;
762	}
763
764	pid = clone(thread_fn_get_lam, stack + STACK_SIZE,
765		    SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM, NULL);
766	if (pid < 0) {
767		perror("Clone failed.");
768		return 1;
769	}
770
771	waitpid(pid, &child_ret, 0);
772	ret = WEXITSTATUS(child_ret);
773
774	if (lam != ret)
775		return 1;
776
777	if (test->later) {
778		if (set_lam(test->lam) != 0)
779			return 1;
780	}
781
782	return 0;
783}
784
785static int handle_thread_enable(struct testcases *test)
786{
787	char stack[STACK_SIZE];
788	int ret, child_ret;
789	int lam = test->lam;
790	pid_t pid;
791
792	pid = clone(thread_fn_set_lam, stack + STACK_SIZE,
793		    SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM, test);
794	if (pid < 0) {
795		perror("Clone failed.");
796		return 1;
797	}
798
799	waitpid(pid, &child_ret, 0);
800	ret = WEXITSTATUS(child_ret);
801
802	if (lam != ret)
803		return 1;
804
805	return 0;
806}
807static void run_test(struct testcases *test, int count)
808{
809	int i, ret = 0;
810
811	for (i = 0; i < count; i++) {
812		struct testcases *t = test + i;
813
814		/* fork a process to run test case */
815		tests_cnt++;
816		ret = fork_test(t);
817
818		/* return 3 is not support LA57, the case should be skipped */
819		if (ret == 3) {
820			ksft_test_result_skip("%s", t->msg);
821			continue;
822		}
823
824		if (ret != 0)
825			ret = (t->expected == ret);
826		else
827			ret = !(t->expected);
828
829		ksft_test_result(ret, "%s", t->msg);
830	}
831}
832
833static struct testcases uring_cases[] = {
834	{
835		.later = 0,
836		.lam = LAM_U57_BITS,
837		.test_func = handle_uring,
838		.msg = "URING: LAM_U57. Dereferencing pointer with metadata\n",
839	},
840	{
841		.later = 1,
842		.expected = 1,
843		.lam = LAM_U57_BITS,
844		.test_func = handle_uring,
845		.msg = "URING:[Negative] Disable LAM. Dereferencing pointer with metadata.\n",
846	},
847};
848
849static struct testcases malloc_cases[] = {
850	{
851		.later = 0,
852		.lam = LAM_U57_BITS,
853		.test_func = handle_malloc,
854		.msg = "MALLOC: LAM_U57. Dereferencing pointer with metadata\n",
855	},
856	{
857		.later = 1,
858		.expected = 2,
859		.lam = LAM_U57_BITS,
860		.test_func = handle_malloc,
861		.msg = "MALLOC:[Negative] Disable LAM. Dereferencing pointer with metadata.\n",
862	},
863};
864
865static struct testcases bits_cases[] = {
866	{
867		.test_func = handle_max_bits,
868		.msg = "BITS: Check default tag bits\n",
869	},
870};
871
872static struct testcases syscall_cases[] = {
873	{
874		.later = 0,
875		.lam = LAM_U57_BITS,
876		.test_func = handle_syscall,
877		.msg = "SYSCALL: LAM_U57. syscall with metadata\n",
878	},
879	{
880		.later = 1,
881		.expected = 1,
882		.lam = LAM_U57_BITS,
883		.test_func = handle_syscall,
884		.msg = "SYSCALL:[Negative] Disable LAM. Dereferencing pointer with metadata.\n",
885	},
886};
887
888static struct testcases mmap_cases[] = {
889	{
890		.later = 1,
891		.expected = 0,
892		.lam = LAM_U57_BITS,
893		.addr = HIGH_ADDR,
894		.test_func = handle_mmap,
895		.msg = "MMAP: First mmap high address, then set LAM_U57.\n",
896	},
897	{
898		.later = 0,
899		.expected = 0,
900		.lam = LAM_U57_BITS,
901		.addr = HIGH_ADDR,
902		.test_func = handle_mmap,
903		.msg = "MMAP: First LAM_U57, then High address.\n",
904	},
905	{
906		.later = 0,
907		.expected = 0,
908		.lam = LAM_U57_BITS,
909		.addr = LOW_ADDR,
910		.test_func = handle_mmap,
911		.msg = "MMAP: First LAM_U57, then Low address.\n",
912	},
913};
914
915static struct testcases inheritance_cases[] = {
916	{
917		.expected = 0,
918		.lam = LAM_U57_BITS,
919		.test_func = handle_inheritance,
920		.msg = "FORK: LAM_U57, child process should get LAM mode same as parent\n",
921	},
922	{
923		.expected = 0,
924		.lam = LAM_U57_BITS,
925		.test_func = handle_thread,
926		.msg = "THREAD: LAM_U57, child thread should get LAM mode same as parent\n",
927	},
928	{
929		.expected = 1,
930		.lam = LAM_U57_BITS,
931		.test_func = handle_thread_enable,
932		.msg = "THREAD: [NEGATIVE] Enable LAM in child.\n",
933	},
934	{
935		.expected = 1,
936		.later = 1,
937		.lam = LAM_U57_BITS,
938		.test_func = handle_thread,
939		.msg = "THREAD: [NEGATIVE] Enable LAM in parent after thread created.\n",
940	},
941	{
942		.expected = 0,
943		.lam = LAM_U57_BITS,
944		.test_func = handle_execve,
945		.msg = "EXECVE: LAM_U57, child process should get disabled LAM mode\n",
946	},
947};
948
949static void cmd_help(void)
950{
951	printf("usage: lam [-h] [-t test list]\n");
952	printf("\t-t test list: run tests specified in the test list, default:0x%x\n", TEST_MASK);
953	printf("\t\t0x1:malloc; 0x2:max_bits; 0x4:mmap; 0x8:syscall; 0x10:io_uring; 0x20:inherit;\n");
954	printf("\t-h: help\n");
955}
956
957/* Check for file existence */
958uint8_t file_Exists(const char *fileName)
959{
960	struct stat buffer;
961
962	uint8_t ret = (stat(fileName, &buffer) == 0);
963
964	return ret;
965}
966
967/* Sysfs idxd files */
968const char *dsa_configs[] = {
969	"echo 1 > /sys/bus/dsa/devices/dsa0/wq0.1/group_id",
970	"echo shared > /sys/bus/dsa/devices/dsa0/wq0.1/mode",
971	"echo 10 > /sys/bus/dsa/devices/dsa0/wq0.1/priority",
972	"echo 16 > /sys/bus/dsa/devices/dsa0/wq0.1/size",
973	"echo 15 > /sys/bus/dsa/devices/dsa0/wq0.1/threshold",
974	"echo user > /sys/bus/dsa/devices/dsa0/wq0.1/type",
975	"echo MyApp1 > /sys/bus/dsa/devices/dsa0/wq0.1/name",
976	"echo 1 > /sys/bus/dsa/devices/dsa0/engine0.1/group_id",
977	"echo dsa0 > /sys/bus/dsa/drivers/idxd/bind",
978	/* bind files and devices, generated a device file in /dev */
979	"echo wq0.1 > /sys/bus/dsa/drivers/user/bind",
980};
981
982/* DSA device file */
983const char *dsaDeviceFile = "/dev/dsa/wq0.1";
984/* file for io*/
985const char *dsaPasidEnable = "/sys/bus/dsa/devices/dsa0/pasid_enabled";
986
987/*
988 * DSA depends on kernel cmdline "intel_iommu=on,sm_on"
989 * return pasid_enabled (0: disable 1:enable)
990 */
991int Check_DSA_Kernel_Setting(void)
992{
993	char command[256] = "";
994	char buf[256] = "";
995	char *ptr;
996	int rv = -1;
997
998	snprintf(command, sizeof(command) - 1, "cat %s", dsaPasidEnable);
999
1000	FILE *cmd = popen(command, "r");
1001
1002	if (cmd) {
1003		while (fgets(buf, sizeof(buf) - 1, cmd) != NULL);
1004
1005		pclose(cmd);
1006		rv = strtol(buf, &ptr, 16);
1007	}
1008
1009	return rv;
1010}
1011
1012/*
1013 * Config DSA's sysfs files as shared DSA's WQ.
1014 * Generated a device file /dev/dsa/wq0.1
1015 * Return:  0 OK; 1 Failed; 3 Skip(SVA disabled).
1016 */
1017int Dsa_Init_Sysfs(void)
1018{
1019	uint len = ARRAY_SIZE(dsa_configs);
1020	const char **p = dsa_configs;
1021
1022	if (file_Exists(dsaDeviceFile) == 1)
1023		return 0;
1024
1025	/* check the idxd driver */
1026	if (file_Exists(dsaPasidEnable) != 1) {
1027		printf("Please make sure idxd driver was loaded\n");
1028		return 3;
1029	}
1030
1031	/* Check SVA feature */
1032	if (Check_DSA_Kernel_Setting() != 1) {
1033		printf("Please enable SVA.(Add intel_iommu=on,sm_on in kernel cmdline)\n");
1034		return 3;
1035	}
1036
1037	/* Check the idxd device file on /dev/dsa/ */
1038	for (int i = 0; i < len; i++) {
1039		if (system(p[i]))
1040			return 1;
1041	}
1042
1043	/* After config, /dev/dsa/wq0.1 should be generated */
1044	return (file_Exists(dsaDeviceFile) != 1);
1045}
1046
1047/*
1048 * Open DSA device file, triger API: iommu_sva_alloc_pasid
1049 */
1050void *allocate_dsa_pasid(void)
1051{
1052	int fd;
1053	void *wq;
1054
1055	fd = open(dsaDeviceFile, O_RDWR);
1056	if (fd < 0) {
1057		perror("open");
1058		return MAP_FAILED;
1059	}
1060
1061	wq = mmap(NULL, 0x1000, PROT_WRITE,
1062			   MAP_SHARED | MAP_POPULATE, fd, 0);
1063	if (wq == MAP_FAILED)
1064		perror("mmap");
1065
1066	return wq;
1067}
1068
1069int set_force_svm(void)
1070{
1071	int ret = 0;
1072
1073	ret = syscall(SYS_arch_prctl, ARCH_FORCE_TAGGED_SVA);
1074
1075	return ret;
1076}
1077
1078int handle_pasid(struct testcases *test)
1079{
1080	uint tmp = test->cmd;
1081	uint runed = 0x0;
1082	int ret = 0;
1083	void *wq = NULL;
1084
1085	ret = Dsa_Init_Sysfs();
1086	if (ret != 0)
1087		return ret;
1088
1089	for (int i = 0; i < 3; i++) {
1090		int err = 0;
1091
1092		if (tmp & 0x1) {
1093			/* run set lam mode*/
1094			if ((runed & 0x1) == 0)	{
1095				err = set_lam(LAM_U57_BITS);
1096				runed = runed | 0x1;
1097			} else
1098				err = 1;
1099		} else if (tmp & 0x4) {
1100			/* run force svm */
1101			if ((runed & 0x4) == 0)	{
1102				err = set_force_svm();
1103				runed = runed | 0x4;
1104			} else
1105				err = 1;
1106		} else if (tmp & 0x2) {
1107			/* run allocate pasid */
1108			if ((runed & 0x2) == 0) {
1109				runed = runed | 0x2;
1110				wq = allocate_dsa_pasid();
1111				if (wq == MAP_FAILED)
1112					err = 1;
1113			} else
1114				err = 1;
1115		}
1116
1117		ret = ret + err;
1118		if (ret > 0)
1119			break;
1120
1121		tmp = tmp >> 4;
1122	}
1123
1124	if (wq != MAP_FAILED && wq != NULL)
1125		if (munmap(wq, 0x1000))
1126			printf("munmap failed %d\n", errno);
1127
1128	if (runed != 0x7)
1129		ret = 1;
1130
1131	return (ret != 0);
1132}
1133
1134/*
1135 * Pasid test depends on idxd and SVA, kernel should enable iommu and sm.
1136 * command line(intel_iommu=on,sm_on)
1137 */
1138static struct testcases pasid_cases[] = {
1139	{
1140		.expected = 1,
1141		.cmd = PAS_CMD(LAM_CMD_BIT, PAS_CMD_BIT, SVA_CMD_BIT),
1142		.test_func = handle_pasid,
1143		.msg = "PASID: [Negative] Execute LAM, PASID, SVA in sequence\n",
1144	},
1145	{
1146		.expected = 0,
1147		.cmd = PAS_CMD(LAM_CMD_BIT, SVA_CMD_BIT, PAS_CMD_BIT),
1148		.test_func = handle_pasid,
1149		.msg = "PASID: Execute LAM, SVA, PASID in sequence\n",
1150	},
1151	{
1152		.expected = 1,
1153		.cmd = PAS_CMD(PAS_CMD_BIT, LAM_CMD_BIT, SVA_CMD_BIT),
1154		.test_func = handle_pasid,
1155		.msg = "PASID: [Negative] Execute PASID, LAM, SVA in sequence\n",
1156	},
1157	{
1158		.expected = 0,
1159		.cmd = PAS_CMD(PAS_CMD_BIT, SVA_CMD_BIT, LAM_CMD_BIT),
1160		.test_func = handle_pasid,
1161		.msg = "PASID: Execute PASID, SVA, LAM in sequence\n",
1162	},
1163	{
1164		.expected = 0,
1165		.cmd = PAS_CMD(SVA_CMD_BIT, LAM_CMD_BIT, PAS_CMD_BIT),
1166		.test_func = handle_pasid,
1167		.msg = "PASID: Execute SVA, LAM, PASID in sequence\n",
1168	},
1169	{
1170		.expected = 0,
1171		.cmd = PAS_CMD(SVA_CMD_BIT, PAS_CMD_BIT, LAM_CMD_BIT),
1172		.test_func = handle_pasid,
1173		.msg = "PASID: Execute SVA, PASID, LAM in sequence\n",
1174	},
1175};
1176
1177int main(int argc, char **argv)
1178{
1179	int c = 0;
1180	unsigned int tests = TEST_MASK;
1181
1182	tests_cnt = 0;
1183
1184	if (!cpu_has_lam()) {
1185		ksft_print_msg("Unsupported LAM feature!\n");
1186		return -1;
1187	}
1188
1189	while ((c = getopt(argc, argv, "ht:")) != -1) {
1190		switch (c) {
1191		case 't':
1192			tests = strtoul(optarg, NULL, 16);
1193			if (tests && !(tests & TEST_MASK)) {
1194				ksft_print_msg("Invalid argument!\n");
1195				return -1;
1196			}
1197			break;
1198		case 'h':
1199			cmd_help();
1200			return 0;
1201		default:
1202			ksft_print_msg("Invalid argument\n");
1203			return -1;
1204		}
1205	}
1206
1207	/*
1208	 * When tests is 0, it is not a real test case;
1209	 * the option used by test case(execve) to check the lam mode in
1210	 * process generated by execve, the process read back lam mode and
1211	 * check with lam mode in parent process.
1212	 */
1213	if (!tests)
1214		return (get_lam());
1215
1216	/* Run test cases */
1217	if (tests & FUNC_MALLOC)
1218		run_test(malloc_cases, ARRAY_SIZE(malloc_cases));
1219
1220	if (tests & FUNC_BITS)
1221		run_test(bits_cases, ARRAY_SIZE(bits_cases));
1222
1223	if (tests & FUNC_MMAP)
1224		run_test(mmap_cases, ARRAY_SIZE(mmap_cases));
1225
1226	if (tests & FUNC_SYSCALL)
1227		run_test(syscall_cases, ARRAY_SIZE(syscall_cases));
1228
1229	if (tests & FUNC_URING)
1230		run_test(uring_cases, ARRAY_SIZE(uring_cases));
1231
1232	if (tests & FUNC_INHERITE)
1233		run_test(inheritance_cases, ARRAY_SIZE(inheritance_cases));
1234
1235	if (tests & FUNC_PASID)
1236		run_test(pasid_cases, ARRAY_SIZE(pasid_cases));
1237
1238	ksft_set_plan(tests_cnt);
1239
1240	return ksft_exit_pass();
1241}
1242