1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (c) 2020 Collabora Ltd.
4 *
5 * Benchmark and test syscall user dispatch
6 */
7
8#define _GNU_SOURCE
9#include <stdio.h>
10#include <string.h>
11#include <stdlib.h>
12#include <signal.h>
13#include <errno.h>
14#include <time.h>
15#include <sys/time.h>
16#include <unistd.h>
17#include <sys/sysinfo.h>
18#include <sys/prctl.h>
19#include <sys/syscall.h>
20
21#ifndef PR_SET_SYSCALL_USER_DISPATCH
22# define PR_SET_SYSCALL_USER_DISPATCH	59
23# define PR_SYS_DISPATCH_OFF	0
24# define PR_SYS_DISPATCH_ON	1
25# define SYSCALL_DISPATCH_FILTER_ALLOW	0
26# define SYSCALL_DISPATCH_FILTER_BLOCK	1
27#endif
28
29#ifdef __NR_syscalls
30# define MAGIC_SYSCALL_1 (__NR_syscalls + 1) /* Bad Linux syscall number */
31#else
32# define MAGIC_SYSCALL_1 (0xff00)  /* Bad Linux syscall number */
33#endif
34
35/*
36 * To test returning from a sigsys with selector blocked, the test
37 * requires some per-architecture support (i.e. knowledge about the
38 * signal trampoline address).  On i386, we know it is on the vdso, and
39 * a small trampoline is open-coded for x86_64.  Other architectures
40 * that have a trampoline in the vdso will support TEST_BLOCKED_RETURN
41 * out of the box, but don't enable them until they support syscall user
42 * dispatch.
43 */
44#if defined(__x86_64__) || defined(__i386__)
45#define TEST_BLOCKED_RETURN
46#endif
47
48#ifdef __x86_64__
49void* (syscall_dispatcher_start)(void);
50void* (syscall_dispatcher_end)(void);
51#else
52unsigned long syscall_dispatcher_start = 0;
53unsigned long syscall_dispatcher_end = 0;
54#endif
55
56unsigned long trapped_call_count = 0;
57unsigned long native_call_count = 0;
58
59char selector;
60#define SYSCALL_BLOCK   (selector = SYSCALL_DISPATCH_FILTER_BLOCK)
61#define SYSCALL_UNBLOCK (selector = SYSCALL_DISPATCH_FILTER_ALLOW)
62
63#define CALIBRATION_STEP 100000
64#define CALIBRATE_TO_SECS 5
65int factor;
66
67static double one_sysinfo_step(void)
68{
69	struct timespec t1, t2;
70	int i;
71	struct sysinfo info;
72
73	clock_gettime(CLOCK_MONOTONIC, &t1);
74	for (i = 0; i < CALIBRATION_STEP; i++)
75		sysinfo(&info);
76	clock_gettime(CLOCK_MONOTONIC, &t2);
77	return (t2.tv_sec - t1.tv_sec) + 1.0e-9 * (t2.tv_nsec - t1.tv_nsec);
78}
79
80static void calibrate_set(void)
81{
82	double elapsed = 0;
83
84	printf("Calibrating test set to last ~%d seconds...\n", CALIBRATE_TO_SECS);
85
86	while (elapsed < 1) {
87		elapsed += one_sysinfo_step();
88		factor += CALIBRATE_TO_SECS;
89	}
90
91	printf("test iterations = %d\n", CALIBRATION_STEP * factor);
92}
93
94static double perf_syscall(void)
95{
96	unsigned int i;
97	double partial = 0;
98
99	for (i = 0; i < factor; ++i)
100		partial += one_sysinfo_step()/(CALIBRATION_STEP*factor);
101	return partial;
102}
103
104static void handle_sigsys(int sig, siginfo_t *info, void *ucontext)
105{
106	char buf[1024];
107	int len;
108
109	SYSCALL_UNBLOCK;
110
111	/* printf and friends are not signal-safe. */
112	len = snprintf(buf, 1024, "Caught sys_%x\n", info->si_syscall);
113	write(1, buf, len);
114
115	if (info->si_syscall == MAGIC_SYSCALL_1)
116		trapped_call_count++;
117	else
118		native_call_count++;
119
120#ifdef TEST_BLOCKED_RETURN
121	SYSCALL_BLOCK;
122#endif
123
124#ifdef __x86_64__
125	__asm__ volatile("movq $0xf, %rax");
126	__asm__ volatile("leaveq");
127	__asm__ volatile("add $0x8, %rsp");
128	__asm__ volatile("syscall_dispatcher_start:");
129	__asm__ volatile("syscall");
130	__asm__ volatile("nop"); /* Landing pad within dispatcher area */
131	__asm__ volatile("syscall_dispatcher_end:");
132#endif
133
134}
135
136int main(void)
137{
138	struct sigaction act;
139	double time1, time2;
140	int ret;
141	sigset_t mask;
142
143	memset(&act, 0, sizeof(act));
144	sigemptyset(&mask);
145
146	act.sa_sigaction = handle_sigsys;
147	act.sa_flags = SA_SIGINFO;
148	act.sa_mask = mask;
149
150	calibrate_set();
151
152	time1 = perf_syscall();
153	printf("Avg syscall time %.0lfns.\n", time1 * 1.0e9);
154
155	ret = sigaction(SIGSYS, &act, NULL);
156	if (ret) {
157		perror("Error sigaction:");
158		exit(-1);
159	}
160
161	fprintf(stderr, "Enabling syscall trapping.\n");
162
163	if (prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON,
164		  syscall_dispatcher_start,
165		  (syscall_dispatcher_end - syscall_dispatcher_start + 1),
166		  &selector)) {
167		perror("prctl failed\n");
168		exit(-1);
169	}
170
171	SYSCALL_BLOCK;
172	syscall(MAGIC_SYSCALL_1);
173
174#ifdef TEST_BLOCKED_RETURN
175	if (selector == SYSCALL_DISPATCH_FILTER_ALLOW) {
176		fprintf(stderr, "Failed to return with selector blocked.\n");
177		exit(-1);
178	}
179#endif
180
181	SYSCALL_UNBLOCK;
182
183	if (!trapped_call_count) {
184		fprintf(stderr, "syscall trapping does not work.\n");
185		exit(-1);
186	}
187
188	time2 = perf_syscall();
189
190	if (native_call_count) {
191		perror("syscall trapping intercepted more syscalls than expected\n");
192		exit(-1);
193	}
194
195	printf("trapped_call_count %lu, native_call_count %lu.\n",
196	       trapped_call_count, native_call_count);
197	printf("Avg syscall time %.0lfns.\n", time2 * 1.0e9);
198	printf("Interception overhead: %.1lf%% (+%.0lfns).\n",
199	       100.0 * (time2 / time1 - 1.0), 1.0e9 * (time2 - time1));
200	return 0;
201
202}
203