1// SPDX-License-Identifier: GPL-2.0
2
3#define _GNU_SOURCE
4#include <errno.h>
5#include <fcntl.h>
6#include <linux/kernel.h>
7#include <limits.h>
8#include <stdbool.h>
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <syscall.h>
13#include <unistd.h>
14#include <sys/resource.h>
15#include <linux/close_range.h>
16
17#include "../kselftest_harness.h"
18#include "../clone3/clone3_selftests.h"
19
20static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
21				  unsigned int flags)
22{
23	return syscall(__NR_close_range, fd, max_fd, flags);
24}
25
26TEST(core_close_range)
27{
28	int i, ret;
29	int open_fds[101];
30
31	for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
32		int fd;
33
34		fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
35		ASSERT_GE(fd, 0) {
36			if (errno == ENOENT)
37				SKIP(return, "Skipping test since /dev/null does not exist");
38		}
39
40		open_fds[i] = fd;
41	}
42
43	EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) {
44		if (errno == ENOSYS)
45			SKIP(return, "close_range() syscall not supported");
46	}
47
48	EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0));
49
50	for (i = 0; i <= 50; i++)
51		EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
52
53	for (i = 51; i <= 100; i++)
54		EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
55
56	/* create a couple of gaps */
57	close(57);
58	close(78);
59	close(81);
60	close(82);
61	close(84);
62	close(90);
63
64	EXPECT_EQ(0, sys_close_range(open_fds[51], open_fds[92], 0));
65
66	for (i = 51; i <= 92; i++)
67		EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
68
69	for (i = 93; i <= 100; i++)
70		EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
71
72	/* test that the kernel caps and still closes all fds */
73	EXPECT_EQ(0, sys_close_range(open_fds[93], open_fds[99], 0));
74
75	for (i = 93; i <= 99; i++)
76		EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
77
78	EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
79
80	EXPECT_EQ(0, sys_close_range(open_fds[100], open_fds[100], 0));
81
82	EXPECT_EQ(-1, fcntl(open_fds[100], F_GETFL));
83}
84
85TEST(close_range_unshare)
86{
87	int i, ret, status;
88	pid_t pid;
89	int open_fds[101];
90	struct __clone_args args = {
91		.flags = CLONE_FILES,
92		.exit_signal = SIGCHLD,
93	};
94
95	for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
96		int fd;
97
98		fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
99		ASSERT_GE(fd, 0) {
100			if (errno == ENOENT)
101				SKIP(return, "Skipping test since /dev/null does not exist");
102		}
103
104		open_fds[i] = fd;
105	}
106
107	pid = sys_clone3(&args, sizeof(args));
108	ASSERT_GE(pid, 0);
109
110	if (pid == 0) {
111		ret = sys_close_range(open_fds[0], open_fds[50],
112				      CLOSE_RANGE_UNSHARE);
113		if (ret)
114			exit(EXIT_FAILURE);
115
116		for (i = 0; i <= 50; i++)
117			if (fcntl(open_fds[i], F_GETFL) != -1)
118				exit(EXIT_FAILURE);
119
120		for (i = 51; i <= 100; i++)
121			if (fcntl(open_fds[i], F_GETFL) == -1)
122				exit(EXIT_FAILURE);
123
124		/* create a couple of gaps */
125		close(57);
126		close(78);
127		close(81);
128		close(82);
129		close(84);
130		close(90);
131
132		ret = sys_close_range(open_fds[51], open_fds[92],
133				      CLOSE_RANGE_UNSHARE);
134		if (ret)
135			exit(EXIT_FAILURE);
136
137		for (i = 51; i <= 92; i++)
138			if (fcntl(open_fds[i], F_GETFL) != -1)
139				exit(EXIT_FAILURE);
140
141		for (i = 93; i <= 100; i++)
142			if (fcntl(open_fds[i], F_GETFL) == -1)
143				exit(EXIT_FAILURE);
144
145		/* test that the kernel caps and still closes all fds */
146		ret = sys_close_range(open_fds[93], open_fds[99],
147				      CLOSE_RANGE_UNSHARE);
148		if (ret)
149			exit(EXIT_FAILURE);
150
151		for (i = 93; i <= 99; i++)
152			if (fcntl(open_fds[i], F_GETFL) != -1)
153				exit(EXIT_FAILURE);
154
155		if (fcntl(open_fds[100], F_GETFL) == -1)
156			exit(EXIT_FAILURE);
157
158		ret = sys_close_range(open_fds[100], open_fds[100],
159				      CLOSE_RANGE_UNSHARE);
160		if (ret)
161			exit(EXIT_FAILURE);
162
163		if (fcntl(open_fds[100], F_GETFL) != -1)
164			exit(EXIT_FAILURE);
165
166		exit(EXIT_SUCCESS);
167	}
168
169	EXPECT_EQ(waitpid(pid, &status, 0), pid);
170	EXPECT_EQ(true, WIFEXITED(status));
171	EXPECT_EQ(0, WEXITSTATUS(status));
172}
173
174TEST(close_range_unshare_capped)
175{
176	int i, ret, status;
177	pid_t pid;
178	int open_fds[101];
179	struct __clone_args args = {
180		.flags = CLONE_FILES,
181		.exit_signal = SIGCHLD,
182	};
183
184	for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
185		int fd;
186
187		fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
188		ASSERT_GE(fd, 0) {
189			if (errno == ENOENT)
190				SKIP(return, "Skipping test since /dev/null does not exist");
191		}
192
193		open_fds[i] = fd;
194	}
195
196	pid = sys_clone3(&args, sizeof(args));
197	ASSERT_GE(pid, 0);
198
199	if (pid == 0) {
200		ret = sys_close_range(open_fds[0], UINT_MAX,
201				      CLOSE_RANGE_UNSHARE);
202		if (ret)
203			exit(EXIT_FAILURE);
204
205		for (i = 0; i <= 100; i++)
206			if (fcntl(open_fds[i], F_GETFL) != -1)
207				exit(EXIT_FAILURE);
208
209		exit(EXIT_SUCCESS);
210	}
211
212	EXPECT_EQ(waitpid(pid, &status, 0), pid);
213	EXPECT_EQ(true, WIFEXITED(status));
214	EXPECT_EQ(0, WEXITSTATUS(status));
215}
216
217TEST(close_range_cloexec)
218{
219	int i, ret;
220	int open_fds[101];
221	struct rlimit rlimit;
222
223	for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
224		int fd;
225
226		fd = open("/dev/null", O_RDONLY);
227		ASSERT_GE(fd, 0) {
228			if (errno == ENOENT)
229				SKIP(return, "Skipping test since /dev/null does not exist");
230		}
231
232		open_fds[i] = fd;
233	}
234
235	ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
236	if (ret < 0) {
237		if (errno == ENOSYS)
238			SKIP(return, "close_range() syscall not supported");
239		if (errno == EINVAL)
240			SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
241	}
242
243	/* Ensure the FD_CLOEXEC bit is set also with a resource limit in place.  */
244	ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
245	rlimit.rlim_cur = 25;
246	ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit));
247
248	/* Set close-on-exec for two ranges: [0-50] and [75-100].  */
249	ret = sys_close_range(open_fds[0], open_fds[50], CLOSE_RANGE_CLOEXEC);
250	ASSERT_EQ(0, ret);
251	ret = sys_close_range(open_fds[75], open_fds[100], CLOSE_RANGE_CLOEXEC);
252	ASSERT_EQ(0, ret);
253
254	for (i = 0; i <= 50; i++) {
255		int flags = fcntl(open_fds[i], F_GETFD);
256
257		EXPECT_GT(flags, -1);
258		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
259	}
260
261	for (i = 51; i <= 74; i++) {
262		int flags = fcntl(open_fds[i], F_GETFD);
263
264		EXPECT_GT(flags, -1);
265		EXPECT_EQ(flags & FD_CLOEXEC, 0);
266	}
267
268	for (i = 75; i <= 100; i++) {
269		int flags = fcntl(open_fds[i], F_GETFD);
270
271		EXPECT_GT(flags, -1);
272		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
273	}
274
275	/* Test a common pattern.  */
276	ret = sys_close_range(3, UINT_MAX, CLOSE_RANGE_CLOEXEC);
277	for (i = 0; i <= 100; i++) {
278		int flags = fcntl(open_fds[i], F_GETFD);
279
280		EXPECT_GT(flags, -1);
281		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
282	}
283}
284
285TEST(close_range_cloexec_unshare)
286{
287	int i, ret;
288	int open_fds[101];
289	struct rlimit rlimit;
290
291	for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
292		int fd;
293
294		fd = open("/dev/null", O_RDONLY);
295		ASSERT_GE(fd, 0) {
296			if (errno == ENOENT)
297				SKIP(return, "Skipping test since /dev/null does not exist");
298		}
299
300		open_fds[i] = fd;
301	}
302
303	ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
304	if (ret < 0) {
305		if (errno == ENOSYS)
306			SKIP(return, "close_range() syscall not supported");
307		if (errno == EINVAL)
308			SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
309	}
310
311	/* Ensure the FD_CLOEXEC bit is set also with a resource limit in place.  */
312	ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
313	rlimit.rlim_cur = 25;
314	ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit));
315
316	/* Set close-on-exec for two ranges: [0-50] and [75-100].  */
317	ret = sys_close_range(open_fds[0], open_fds[50],
318			      CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
319	ASSERT_EQ(0, ret);
320	ret = sys_close_range(open_fds[75], open_fds[100],
321			      CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
322	ASSERT_EQ(0, ret);
323
324	for (i = 0; i <= 50; i++) {
325		int flags = fcntl(open_fds[i], F_GETFD);
326
327		EXPECT_GT(flags, -1);
328		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
329	}
330
331	for (i = 51; i <= 74; i++) {
332		int flags = fcntl(open_fds[i], F_GETFD);
333
334		EXPECT_GT(flags, -1);
335		EXPECT_EQ(flags & FD_CLOEXEC, 0);
336	}
337
338	for (i = 75; i <= 100; i++) {
339		int flags = fcntl(open_fds[i], F_GETFD);
340
341		EXPECT_GT(flags, -1);
342		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
343	}
344
345	/* Test a common pattern.  */
346	ret = sys_close_range(3, UINT_MAX,
347			      CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
348	for (i = 0; i <= 100; i++) {
349		int flags = fcntl(open_fds[i], F_GETFD);
350
351		EXPECT_GT(flags, -1);
352		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
353	}
354}
355
356/*
357 * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com
358 */
359TEST(close_range_cloexec_syzbot)
360{
361	int fd1, fd2, fd3, flags, ret, status;
362	pid_t pid;
363	struct __clone_args args = {
364		.flags = CLONE_FILES,
365		.exit_signal = SIGCHLD,
366	};
367
368	/* Create a huge gap in the fd table. */
369	fd1 = open("/dev/null", O_RDWR);
370	EXPECT_GT(fd1, 0);
371
372	fd2 = dup2(fd1, 1000);
373	EXPECT_GT(fd2, 0);
374
375	pid = sys_clone3(&args, sizeof(args));
376	ASSERT_GE(pid, 0);
377
378	if (pid == 0) {
379		ret = sys_close_range(3, ~0U, CLOSE_RANGE_CLOEXEC);
380		if (ret)
381			exit(EXIT_FAILURE);
382
383		/*
384			 * We now have a private file descriptor table and all
385			 * our open fds should still be open but made
386			 * close-on-exec.
387			 */
388		flags = fcntl(fd1, F_GETFD);
389		EXPECT_GT(flags, -1);
390		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
391
392		flags = fcntl(fd2, F_GETFD);
393		EXPECT_GT(flags, -1);
394		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
395
396		fd3 = dup2(fd1, 42);
397		EXPECT_GT(fd3, 0);
398
399		/*
400			 * Duplicating the file descriptor must remove the
401			 * FD_CLOEXEC flag.
402			 */
403		flags = fcntl(fd3, F_GETFD);
404		EXPECT_GT(flags, -1);
405		EXPECT_EQ(flags & FD_CLOEXEC, 0);
406
407		exit(EXIT_SUCCESS);
408	}
409
410	EXPECT_EQ(waitpid(pid, &status, 0), pid);
411	EXPECT_EQ(true, WIFEXITED(status));
412	EXPECT_EQ(0, WEXITSTATUS(status));
413
414	/*
415	 * We had a shared file descriptor table before along with requesting
416	 * close-on-exec so the original fds must not be close-on-exec.
417	 */
418	flags = fcntl(fd1, F_GETFD);
419	EXPECT_GT(flags, -1);
420	EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
421
422	flags = fcntl(fd2, F_GETFD);
423	EXPECT_GT(flags, -1);
424	EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
425
426	fd3 = dup2(fd1, 42);
427	EXPECT_GT(fd3, 0);
428
429	flags = fcntl(fd3, F_GETFD);
430	EXPECT_GT(flags, -1);
431	EXPECT_EQ(flags & FD_CLOEXEC, 0);
432
433	EXPECT_EQ(close(fd1), 0);
434	EXPECT_EQ(close(fd2), 0);
435	EXPECT_EQ(close(fd3), 0);
436}
437
438/*
439 * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com
440 */
441TEST(close_range_cloexec_unshare_syzbot)
442{
443	int i, fd1, fd2, fd3, flags, ret, status;
444	pid_t pid;
445	struct __clone_args args = {
446		.flags = CLONE_FILES,
447		.exit_signal = SIGCHLD,
448	};
449
450	/*
451	 * Create a huge gap in the fd table. When we now call
452	 * CLOSE_RANGE_UNSHARE with a shared fd table and and with ~0U as upper
453	 * bound the kernel will only copy up to fd1 file descriptors into the
454	 * new fd table. If the kernel is buggy and doesn't handle
455	 * CLOSE_RANGE_CLOEXEC correctly it will not have copied all file
456	 * descriptors and we will oops!
457	 *
458	 * On a buggy kernel this should immediately oops. But let's loop just
459	 * to be sure.
460	 */
461	fd1 = open("/dev/null", O_RDWR);
462	EXPECT_GT(fd1, 0);
463
464	fd2 = dup2(fd1, 1000);
465	EXPECT_GT(fd2, 0);
466
467	for (i = 0; i < 100; i++) {
468
469		pid = sys_clone3(&args, sizeof(args));
470		ASSERT_GE(pid, 0);
471
472		if (pid == 0) {
473			ret = sys_close_range(3, ~0U, CLOSE_RANGE_UNSHARE |
474						      CLOSE_RANGE_CLOEXEC);
475			if (ret)
476				exit(EXIT_FAILURE);
477
478			/*
479			 * We now have a private file descriptor table and all
480			 * our open fds should still be open but made
481			 * close-on-exec.
482			 */
483			flags = fcntl(fd1, F_GETFD);
484			EXPECT_GT(flags, -1);
485			EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
486
487			flags = fcntl(fd2, F_GETFD);
488			EXPECT_GT(flags, -1);
489			EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
490
491			fd3 = dup2(fd1, 42);
492			EXPECT_GT(fd3, 0);
493
494			/*
495			 * Duplicating the file descriptor must remove the
496			 * FD_CLOEXEC flag.
497			 */
498			flags = fcntl(fd3, F_GETFD);
499			EXPECT_GT(flags, -1);
500			EXPECT_EQ(flags & FD_CLOEXEC, 0);
501
502			EXPECT_EQ(close(fd1), 0);
503			EXPECT_EQ(close(fd2), 0);
504			EXPECT_EQ(close(fd3), 0);
505
506			exit(EXIT_SUCCESS);
507		}
508
509		EXPECT_EQ(waitpid(pid, &status, 0), pid);
510		EXPECT_EQ(true, WIFEXITED(status));
511		EXPECT_EQ(0, WEXITSTATUS(status));
512	}
513
514	/*
515	 * We created a private file descriptor table before along with
516	 * requesting close-on-exec so the original fds must not be
517	 * close-on-exec.
518	 */
519	flags = fcntl(fd1, F_GETFD);
520	EXPECT_GT(flags, -1);
521	EXPECT_EQ(flags & FD_CLOEXEC, 0);
522
523	flags = fcntl(fd2, F_GETFD);
524	EXPECT_GT(flags, -1);
525	EXPECT_EQ(flags & FD_CLOEXEC, 0);
526
527	fd3 = dup2(fd1, 42);
528	EXPECT_GT(fd3, 0);
529
530	flags = fcntl(fd3, F_GETFD);
531	EXPECT_GT(flags, -1);
532	EXPECT_EQ(flags & FD_CLOEXEC, 0);
533
534	EXPECT_EQ(close(fd1), 0);
535	EXPECT_EQ(close(fd2), 0);
536	EXPECT_EQ(close(fd3), 0);
537}
538
539TEST_HARNESS_MAIN
540