1// SPDX-License-Identifier: GPL-2.0
2
3#define _GNU_SOURCE
4#include <errno.h>
5#include <fcntl.h>
6#include <linux/netlink.h>
7#include <signal.h>
8#include <stdbool.h>
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <sys/prctl.h>
13#include <sys/socket.h>
14#include <sched.h>
15#include <sys/eventfd.h>
16#include <sys/stat.h>
17#include <sys/syscall.h>
18#include <sys/types.h>
19#include <sys/wait.h>
20#include <unistd.h>
21
22#include "../kselftest_harness.h"
23
24#define __DEV_FULL "/sys/devices/virtual/mem/full/uevent"
25#define __UEVENT_BUFFER_SIZE (2048 * 2)
26#define __UEVENT_HEADER "add@/devices/virtual/mem/full"
27#define __UEVENT_HEADER_LEN sizeof("add@/devices/virtual/mem/full")
28#define __UEVENT_LISTEN_ALL -1
29
30ssize_t read_nointr(int fd, void *buf, size_t count)
31{
32	ssize_t ret;
33
34again:
35	ret = read(fd, buf, count);
36	if (ret < 0 && errno == EINTR)
37		goto again;
38
39	return ret;
40}
41
42ssize_t write_nointr(int fd, const void *buf, size_t count)
43{
44	ssize_t ret;
45
46again:
47	ret = write(fd, buf, count);
48	if (ret < 0 && errno == EINTR)
49		goto again;
50
51	return ret;
52}
53
54int wait_for_pid(pid_t pid)
55{
56	int status, ret;
57
58again:
59	ret = waitpid(pid, &status, 0);
60	if (ret == -1) {
61		if (errno == EINTR)
62			goto again;
63
64		return -1;
65	}
66
67	if (ret != pid)
68		goto again;
69
70	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
71		return -1;
72
73	return 0;
74}
75
76static int uevent_listener(unsigned long post_flags, bool expect_uevent,
77			   int sync_fd)
78{
79	int sk_fd, ret;
80	socklen_t sk_addr_len;
81	int rcv_buf_sz = __UEVENT_BUFFER_SIZE;
82	uint64_t sync_add = 1;
83	struct sockaddr_nl sk_addr = { 0 }, rcv_addr = { 0 };
84	char buf[__UEVENT_BUFFER_SIZE] = { 0 };
85	struct iovec iov = { buf, __UEVENT_BUFFER_SIZE };
86	char control[CMSG_SPACE(sizeof(struct ucred))];
87	struct msghdr hdr = {
88		&rcv_addr, sizeof(rcv_addr), &iov, 1,
89		control,   sizeof(control),  0,
90	};
91
92	sk_fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC,
93		       NETLINK_KOBJECT_UEVENT);
94	if (sk_fd < 0) {
95		fprintf(stderr, "%s - Failed to open uevent socket\n", strerror(errno));
96		return -1;
97	}
98
99	ret = setsockopt(sk_fd, SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz,
100			 sizeof(rcv_buf_sz));
101	if (ret < 0) {
102		fprintf(stderr, "%s - Failed to set socket options\n", strerror(errno));
103		goto on_error;
104	}
105
106	sk_addr.nl_family = AF_NETLINK;
107	sk_addr.nl_groups = __UEVENT_LISTEN_ALL;
108
109	sk_addr_len = sizeof(sk_addr);
110	ret = bind(sk_fd, (struct sockaddr *)&sk_addr, sk_addr_len);
111	if (ret < 0) {
112		fprintf(stderr, "%s - Failed to bind socket\n", strerror(errno));
113		goto on_error;
114	}
115
116	ret = getsockname(sk_fd, (struct sockaddr *)&sk_addr, &sk_addr_len);
117	if (ret < 0) {
118		fprintf(stderr, "%s - Failed to retrieve socket name\n", strerror(errno));
119		goto on_error;
120	}
121
122	if ((size_t)sk_addr_len != sizeof(sk_addr)) {
123		fprintf(stderr, "Invalid socket address size\n");
124		ret = -1;
125		goto on_error;
126	}
127
128	if (post_flags & CLONE_NEWUSER) {
129		ret = unshare(CLONE_NEWUSER);
130		if (ret < 0) {
131			fprintf(stderr,
132				"%s - Failed to unshare user namespace\n",
133				strerror(errno));
134			goto on_error;
135		}
136	}
137
138	if (post_flags & CLONE_NEWNET) {
139		ret = unshare(CLONE_NEWNET);
140		if (ret < 0) {
141			fprintf(stderr,
142				"%s - Failed to unshare network namespace\n",
143				strerror(errno));
144			goto on_error;
145		}
146	}
147
148	ret = write_nointr(sync_fd, &sync_add, sizeof(sync_add));
149	close(sync_fd);
150	if (ret != sizeof(sync_add)) {
151		ret = -1;
152		fprintf(stderr, "Failed to synchronize with parent process\n");
153		goto on_error;
154	}
155
156	ret = 0;
157	for (;;) {
158		ssize_t r;
159
160		r = recvmsg(sk_fd, &hdr, 0);
161		if (r <= 0) {
162			fprintf(stderr, "%s - Failed to receive uevent\n", strerror(errno));
163			ret = -1;
164			break;
165		}
166
167		/* ignore libudev messages */
168		if (memcmp(buf, "libudev", 8) == 0)
169			continue;
170
171		/* ignore uevents we didn't trigger */
172		if (memcmp(buf, __UEVENT_HEADER, __UEVENT_HEADER_LEN) != 0)
173			continue;
174
175		if (!expect_uevent) {
176			fprintf(stderr, "Received unexpected uevent:\n");
177			ret = -1;
178		}
179
180		if (TH_LOG_ENABLED) {
181			/* If logging is enabled dump the received uevent. */
182			(void)write_nointr(STDERR_FILENO, buf, r);
183			(void)write_nointr(STDERR_FILENO, "\n", 1);
184		}
185
186		break;
187	}
188
189on_error:
190	close(sk_fd);
191
192	return ret;
193}
194
195int trigger_uevent(unsigned int times)
196{
197	int fd, ret;
198	unsigned int i;
199
200	fd = open(__DEV_FULL, O_RDWR | O_CLOEXEC);
201	if (fd < 0) {
202		if (errno != ENOENT)
203			return -EINVAL;
204
205		return -1;
206	}
207
208	for (i = 0; i < times; i++) {
209		ret = write_nointr(fd, "add\n", sizeof("add\n") - 1);
210		if (ret < 0) {
211			fprintf(stderr, "Failed to trigger uevent\n");
212			break;
213		}
214	}
215	close(fd);
216
217	return ret;
218}
219
220int set_death_signal(void)
221{
222	int ret;
223	pid_t ppid;
224
225	ret = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
226
227	/* Check whether we have been orphaned. */
228	ppid = getppid();
229	if (ppid == 1) {
230		pid_t self;
231
232		self = getpid();
233		ret = kill(self, SIGKILL);
234	}
235
236	if (ret < 0)
237		return -1;
238
239	return 0;
240}
241
242static int do_test(unsigned long pre_flags, unsigned long post_flags,
243		   bool expect_uevent, int sync_fd)
244{
245	int ret;
246	uint64_t wait_val;
247	pid_t pid;
248	sigset_t mask;
249	sigset_t orig_mask;
250	struct timespec timeout;
251
252	sigemptyset(&mask);
253	sigaddset(&mask, SIGCHLD);
254
255	ret = sigprocmask(SIG_BLOCK, &mask, &orig_mask);
256	if (ret < 0) {
257		fprintf(stderr, "%s- Failed to block SIGCHLD\n", strerror(errno));
258		return -1;
259	}
260
261	pid = fork();
262	if (pid < 0) {
263		fprintf(stderr, "%s - Failed to fork() new process\n", strerror(errno));
264		return -1;
265	}
266
267	if (pid == 0) {
268		/* Make sure that we go away when our parent dies. */
269		ret = set_death_signal();
270		if (ret < 0) {
271			fprintf(stderr, "Failed to set PR_SET_PDEATHSIG to SIGKILL\n");
272			_exit(EXIT_FAILURE);
273		}
274
275		if (pre_flags & CLONE_NEWUSER) {
276			ret = unshare(CLONE_NEWUSER);
277			if (ret < 0) {
278				fprintf(stderr,
279					"%s - Failed to unshare user namespace\n",
280					strerror(errno));
281				_exit(EXIT_FAILURE);
282			}
283		}
284
285		if (pre_flags & CLONE_NEWNET) {
286			ret = unshare(CLONE_NEWNET);
287			if (ret < 0) {
288				fprintf(stderr,
289					"%s - Failed to unshare network namespace\n",
290					strerror(errno));
291				_exit(EXIT_FAILURE);
292			}
293		}
294
295		if (uevent_listener(post_flags, expect_uevent, sync_fd) < 0)
296			_exit(EXIT_FAILURE);
297
298		_exit(EXIT_SUCCESS);
299	}
300
301	ret = read_nointr(sync_fd, &wait_val, sizeof(wait_val));
302	if (ret != sizeof(wait_val)) {
303		fprintf(stderr, "Failed to synchronize with child process\n");
304		_exit(EXIT_FAILURE);
305	}
306
307	/* Trigger 10 uevents to account for the case where the kernel might
308	 * drop some.
309	 */
310	ret = trigger_uevent(10);
311	if (ret < 0)
312		fprintf(stderr, "Failed triggering uevents\n");
313
314	/* Wait for 2 seconds before considering this failed. This should be
315	 * plenty of time for the kernel to deliver the uevent even under heavy
316	 * load.
317	 */
318	timeout.tv_sec = 2;
319	timeout.tv_nsec = 0;
320
321again:
322	ret = sigtimedwait(&mask, NULL, &timeout);
323	if (ret < 0) {
324		if (errno == EINTR)
325			goto again;
326
327		if (!expect_uevent)
328			ret = kill(pid, SIGTERM); /* success */
329		else
330			ret = kill(pid, SIGUSR1); /* error */
331		if (ret < 0)
332			return -1;
333	}
334
335	ret = wait_for_pid(pid);
336	if (ret < 0)
337		return -1;
338
339	return ret;
340}
341
342static void signal_handler(int sig)
343{
344	if (sig == SIGTERM)
345		_exit(EXIT_SUCCESS);
346
347	_exit(EXIT_FAILURE);
348}
349
350TEST(uevent_filtering)
351{
352	int ret, sync_fd;
353	struct sigaction act;
354
355	if (geteuid()) {
356		TH_LOG("Uevent filtering tests require root privileges. Skipping test");
357		_exit(KSFT_SKIP);
358	}
359
360	ret = access(__DEV_FULL, F_OK);
361	EXPECT_EQ(0, ret) {
362		if (errno == ENOENT) {
363			TH_LOG(__DEV_FULL " does not exist. Skipping test");
364			_exit(KSFT_SKIP);
365		}
366
367		_exit(KSFT_FAIL);
368	}
369
370	act.sa_handler = signal_handler;
371	act.sa_flags = 0;
372	sigemptyset(&act.sa_mask);
373
374	ret = sigaction(SIGTERM, &act, NULL);
375	ASSERT_EQ(0, ret);
376
377	sync_fd = eventfd(0, EFD_CLOEXEC);
378	ASSERT_GE(sync_fd, 0);
379
380	/*
381	 * Setup:
382	 * - Open uevent listening socket in initial network namespace owned by
383	 *   initial user namespace.
384	 * - Trigger uevent in initial network namespace owned by initial user
385	 *   namespace.
386	 * Expected Result:
387	 * - uevent listening socket receives uevent
388	 */
389	ret = do_test(0, 0, true, sync_fd);
390	ASSERT_EQ(0, ret) {
391		goto do_cleanup;
392	}
393
394	/*
395	 * Setup:
396	 * - Open uevent listening socket in non-initial network namespace
397	 *   owned by initial user namespace.
398	 * - Trigger uevent in initial network namespace owned by initial user
399	 *   namespace.
400	 * Expected Result:
401	 * - uevent listening socket receives uevent
402	 */
403	ret = do_test(CLONE_NEWNET, 0, true, sync_fd);
404	ASSERT_EQ(0, ret) {
405		goto do_cleanup;
406	}
407
408	/*
409	 * Setup:
410	 * - unshare user namespace
411	 * - Open uevent listening socket in initial network namespace
412	 *   owned by initial user namespace.
413	 * - Trigger uevent in initial network namespace owned by initial user
414	 *   namespace.
415	 * Expected Result:
416	 * - uevent listening socket receives uevent
417	 */
418	ret = do_test(CLONE_NEWUSER, 0, true, sync_fd);
419	ASSERT_EQ(0, ret) {
420		goto do_cleanup;
421	}
422
423	/*
424	 * Setup:
425	 * - Open uevent listening socket in non-initial network namespace
426	 *   owned by non-initial user namespace.
427	 * - Trigger uevent in initial network namespace owned by initial user
428	 *   namespace.
429	 * Expected Result:
430	 * - uevent listening socket receives no uevent
431	 */
432	ret = do_test(CLONE_NEWUSER | CLONE_NEWNET, 0, false, sync_fd);
433	ASSERT_EQ(0, ret) {
434		goto do_cleanup;
435	}
436
437	/*
438	 * Setup:
439	 * - Open uevent listening socket in initial network namespace
440	 *   owned by initial user namespace.
441	 * - unshare network namespace
442	 * - Trigger uevent in initial network namespace owned by initial user
443	 *   namespace.
444	 * Expected Result:
445	 * - uevent listening socket receives uevent
446	 */
447	ret = do_test(0, CLONE_NEWNET, true, sync_fd);
448	ASSERT_EQ(0, ret) {
449		goto do_cleanup;
450	}
451
452	/*
453	 * Setup:
454	 * - Open uevent listening socket in initial network namespace
455	 *   owned by initial user namespace.
456	 * - unshare user namespace
457	 * - Trigger uevent in initial network namespace owned by initial user
458	 *   namespace.
459	 * Expected Result:
460	 * - uevent listening socket receives uevent
461	 */
462	ret = do_test(0, CLONE_NEWUSER, true, sync_fd);
463	ASSERT_EQ(0, ret) {
464		goto do_cleanup;
465	}
466
467	/*
468	 * Setup:
469	 * - Open uevent listening socket in initial network namespace
470	 *   owned by initial user namespace.
471	 * - unshare user namespace
472	 * - unshare network namespace
473	 * - Trigger uevent in initial network namespace owned by initial user
474	 *   namespace.
475	 * Expected Result:
476	 * - uevent listening socket receives uevent
477	 */
478	ret = do_test(0, CLONE_NEWUSER | CLONE_NEWNET, true, sync_fd);
479	ASSERT_EQ(0, ret) {
480		goto do_cleanup;
481	}
482
483do_cleanup:
484	close(sync_fd);
485}
486
487TEST_HARNESS_MAIN
488