1// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Based on Christian Brauner's clone3() example.
5 * These tests are assuming to be running in the host's
6 * PID namespace.
7 */
8
9#define _GNU_SOURCE
10#include <errno.h>
11#include <linux/types.h>
12#include <linux/sched.h>
13#include <stdio.h>
14#include <stdlib.h>
15#include <stdbool.h>
16#include <sys/syscall.h>
17#include <sys/types.h>
18#include <sys/un.h>
19#include <sys/wait.h>
20#include <unistd.h>
21#include <sched.h>
22
23#include "../kselftest.h"
24#include "clone3_selftests.h"
25
26#define MAX_PID_NS_LEVEL 32
27
28static int pipe_1[2];
29static int pipe_2[2];
30
31static void child_exit(int ret)
32{
33	fflush(stdout);
34	fflush(stderr);
35	_exit(ret);
36}
37
38static int call_clone3_set_tid(pid_t *set_tid,
39			       size_t set_tid_size,
40			       int flags,
41			       int expected_pid,
42			       bool wait_for_it)
43{
44	int status;
45	pid_t pid = -1;
46
47	struct __clone_args args = {
48		.flags = flags,
49		.exit_signal = SIGCHLD,
50		.set_tid = ptr_to_u64(set_tid),
51		.set_tid_size = set_tid_size,
52	};
53
54	pid = sys_clone3(&args, sizeof(args));
55	if (pid < 0) {
56		ksft_print_msg("%s - Failed to create new process\n",
57			       strerror(errno));
58		return -errno;
59	}
60
61	if (pid == 0) {
62		int ret;
63		char tmp = 0;
64		int exit_code = EXIT_SUCCESS;
65
66		ksft_print_msg("I am the child, my PID is %d (expected %d)\n",
67			       getpid(), set_tid[0]);
68		if (wait_for_it) {
69			ksft_print_msg("[%d] Child is ready and waiting\n",
70				       getpid());
71
72			/* Signal the parent that the child is ready */
73			close(pipe_1[0]);
74			ret = write(pipe_1[1], &tmp, 1);
75			if (ret != 1) {
76				ksft_print_msg(
77					"Writing to pipe returned %d", ret);
78				exit_code = EXIT_FAILURE;
79			}
80			close(pipe_1[1]);
81			close(pipe_2[1]);
82			ret = read(pipe_2[0], &tmp, 1);
83			if (ret != 1) {
84				ksft_print_msg(
85					"Reading from pipe returned %d", ret);
86				exit_code = EXIT_FAILURE;
87			}
88			close(pipe_2[0]);
89		}
90
91		if (set_tid[0] != getpid())
92			child_exit(EXIT_FAILURE);
93		child_exit(exit_code);
94	}
95
96	if (expected_pid == 0 || expected_pid == pid) {
97		ksft_print_msg("I am the parent (%d). My child's pid is %d\n",
98			       getpid(), pid);
99	} else {
100		ksft_print_msg(
101			"Expected child pid %d does not match actual pid %d\n",
102			expected_pid, pid);
103		return -1;
104	}
105
106	if (waitpid(pid, &status, 0) < 0) {
107		ksft_print_msg("Child returned %s\n", strerror(errno));
108		return -errno;
109	}
110
111	if (!WIFEXITED(status))
112		return -1;
113
114	return WEXITSTATUS(status);
115}
116
117static void test_clone3_set_tid(pid_t *set_tid,
118				size_t set_tid_size,
119				int flags,
120				int expected,
121				int expected_pid,
122				bool wait_for_it)
123{
124	int ret;
125
126	ksft_print_msg(
127		"[%d] Trying clone3() with CLONE_SET_TID to %d and 0x%x\n",
128		getpid(), set_tid[0], flags);
129	ret = call_clone3_set_tid(set_tid, set_tid_size, flags, expected_pid,
130				  wait_for_it);
131	ksft_print_msg(
132		"[%d] clone3() with CLONE_SET_TID %d says :%d - expected %d\n",
133		getpid(), set_tid[0], ret, expected);
134	if (ret != expected)
135		ksft_test_result_fail(
136			"[%d] Result (%d) is different than expected (%d)\n",
137			getpid(), ret, expected);
138	else
139		ksft_test_result_pass(
140			"[%d] Result (%d) matches expectation (%d)\n",
141			getpid(), ret, expected);
142}
143int main(int argc, char *argv[])
144{
145	FILE *f;
146	char buf;
147	char *line;
148	int status;
149	int ret = -1;
150	size_t len = 0;
151	int pid_max = 0;
152	uid_t uid = getuid();
153	char proc_path[100] = {0};
154	pid_t pid, ns1, ns2, ns3, ns_pid;
155	pid_t set_tid[MAX_PID_NS_LEVEL * 2];
156
157	ksft_print_header();
158	ksft_set_plan(29);
159	test_clone3_supported();
160
161	if (pipe(pipe_1) < 0 || pipe(pipe_2) < 0)
162		ksft_exit_fail_msg("pipe() failed\n");
163
164	f = fopen("/proc/sys/kernel/pid_max", "r");
165	if (f == NULL)
166		ksft_exit_fail_msg(
167			"%s - Could not open /proc/sys/kernel/pid_max\n",
168			strerror(errno));
169	fscanf(f, "%d", &pid_max);
170	fclose(f);
171	ksft_print_msg("/proc/sys/kernel/pid_max %d\n", pid_max);
172
173	/* Try invalid settings */
174	memset(&set_tid, 0, sizeof(set_tid));
175	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL + 1, 0, -EINVAL, 0, 0);
176
177	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2, 0, -EINVAL, 0, 0);
178
179	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0,
180			-EINVAL, 0, 0);
181
182	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 42, 0, -EINVAL, 0, 0);
183
184	/*
185	 * This can actually work if this test running in a MAX_PID_NS_LEVEL - 1
186	 * nested PID namespace.
187	 */
188	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0);
189
190	memset(&set_tid, 0xff, sizeof(set_tid));
191	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL + 1, 0, -EINVAL, 0, 0);
192
193	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2, 0, -EINVAL, 0, 0);
194
195	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0,
196			-EINVAL, 0, 0);
197
198	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 42, 0, -EINVAL, 0, 0);
199
200	/*
201	 * This can actually work if this test running in a MAX_PID_NS_LEVEL - 1
202	 * nested PID namespace.
203	 */
204	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0);
205
206	memset(&set_tid, 0, sizeof(set_tid));
207	/* Try with an invalid PID */
208	set_tid[0] = 0;
209	test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
210
211	set_tid[0] = -1;
212	test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
213
214	/* Claim that the set_tid array actually contains 2 elements. */
215	test_clone3_set_tid(set_tid, 2, 0, -EINVAL, 0, 0);
216
217	/* Try it in a new PID namespace */
218	if (uid == 0)
219		test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
220	else
221		ksft_test_result_skip("Clone3() with set_tid requires root\n");
222
223	/* Try with a valid PID (1) this should return -EEXIST. */
224	set_tid[0] = 1;
225	if (uid == 0)
226		test_clone3_set_tid(set_tid, 1, 0, -EEXIST, 0, 0);
227	else
228		ksft_test_result_skip("Clone3() with set_tid requires root\n");
229
230	/* Try it in a new PID namespace */
231	if (uid == 0)
232		test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, 0, 0, 0);
233	else
234		ksft_test_result_skip("Clone3() with set_tid requires root\n");
235
236	/* pid_max should fail everywhere */
237	set_tid[0] = pid_max;
238	test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
239
240	if (uid == 0)
241		test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
242	else
243		ksft_test_result_skip("Clone3() with set_tid requires root\n");
244
245	if (uid != 0) {
246		/*
247		 * All remaining tests require root. Tell the framework
248		 * that all those tests are skipped as non-root.
249		 */
250		ksft_cnt.ksft_xskip += ksft_plan - ksft_test_num();
251		goto out;
252	}
253
254	/* Find the current active PID */
255	pid = fork();
256	if (pid == 0) {
257		ksft_print_msg("Child has PID %d\n", getpid());
258		child_exit(EXIT_SUCCESS);
259	}
260	if (waitpid(pid, &status, 0) < 0)
261		ksft_exit_fail_msg("Waiting for child %d failed", pid);
262
263	/* After the child has finished, its PID should be free. */
264	set_tid[0] = pid;
265	test_clone3_set_tid(set_tid, 1, 0, 0, 0, 0);
266
267	/* This should fail as there is no PID 1 in that namespace */
268	test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
269
270	/*
271	 * Creating a process with PID 1 in the newly created most nested
272	 * PID namespace and PID 'pid' in the parent PID namespace. This
273	 * needs to work.
274	 */
275	set_tid[0] = 1;
276	set_tid[1] = pid;
277	test_clone3_set_tid(set_tid, 2, CLONE_NEWPID, 0, pid, 0);
278
279	ksft_print_msg("unshare PID namespace\n");
280	if (unshare(CLONE_NEWPID) == -1)
281		ksft_exit_fail_msg("unshare(CLONE_NEWPID) failed: %s\n",
282				strerror(errno));
283
284	set_tid[0] = pid;
285
286	/* This should fail as there is no PID 1 in that namespace */
287	test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
288
289	/* Let's create a PID 1 */
290	ns_pid = fork();
291	if (ns_pid == 0) {
292		/*
293		 * This and the next test cases check that all pid-s are
294		 * released on error paths.
295		 */
296		set_tid[0] = 43;
297		set_tid[1] = -1;
298		test_clone3_set_tid(set_tid, 2, 0, -EINVAL, 0, 0);
299
300		set_tid[0] = 43;
301		set_tid[1] = pid;
302		test_clone3_set_tid(set_tid, 2, 0, 0, 43, 0);
303
304		ksft_print_msg("Child in PID namespace has PID %d\n", getpid());
305		set_tid[0] = 2;
306		test_clone3_set_tid(set_tid, 1, 0, 0, 2, 0);
307
308		set_tid[0] = 1;
309		set_tid[1] = -1;
310		set_tid[2] = pid;
311		/* This should fail as there is invalid PID at level '1'. */
312		test_clone3_set_tid(set_tid, 3, CLONE_NEWPID, -EINVAL, 0, 0);
313
314		set_tid[0] = 1;
315		set_tid[1] = 42;
316		set_tid[2] = pid;
317		/*
318		 * This should fail as there are not enough active PID
319		 * namespaces. Again assuming this is running in the host's
320		 * PID namespace. Not yet nested.
321		 */
322		test_clone3_set_tid(set_tid, 4, CLONE_NEWPID, -EINVAL, 0, 0);
323
324		/*
325		 * This should work and from the parent we should see
326		 * something like 'NSpid:	pid	42	1'.
327		 */
328		test_clone3_set_tid(set_tid, 3, CLONE_NEWPID, 0, 42, true);
329
330		child_exit(ksft_cnt.ksft_fail);
331	}
332
333	close(pipe_1[1]);
334	close(pipe_2[0]);
335	while (read(pipe_1[0], &buf, 1) > 0) {
336		ksft_print_msg("[%d] Child is ready and waiting\n", getpid());
337		break;
338	}
339
340	snprintf(proc_path, sizeof(proc_path), "/proc/%d/status", pid);
341	f = fopen(proc_path, "r");
342	if (f == NULL)
343		ksft_exit_fail_msg(
344			"%s - Could not open %s\n",
345			strerror(errno), proc_path);
346
347	while (getline(&line, &len, f) != -1) {
348		if (strstr(line, "NSpid")) {
349			int i;
350
351			/* Verify that all generated PIDs are as expected. */
352			i = sscanf(line, "NSpid:\t%d\t%d\t%d",
353				   &ns3, &ns2, &ns1);
354			if (i != 3) {
355				ksft_print_msg(
356					"Unexpected 'NSPid:' entry: %s",
357					line);
358				ns1 = ns2 = ns3 = 0;
359			}
360			break;
361		}
362	}
363	fclose(f);
364	free(line);
365	close(pipe_2[0]);
366
367	/* Tell the clone3()'d child to finish. */
368	write(pipe_2[1], &buf, 1);
369	close(pipe_2[1]);
370
371	if (waitpid(ns_pid, &status, 0) < 0) {
372		ksft_print_msg("Child returned %s\n", strerror(errno));
373		ret = -errno;
374		goto out;
375	}
376
377	if (!WIFEXITED(status))
378		ksft_test_result_fail("Child error\n");
379
380	ksft_cnt.ksft_pass += 6 - (ksft_cnt.ksft_fail - WEXITSTATUS(status));
381	ksft_cnt.ksft_fail = WEXITSTATUS(status);
382
383	if (ns3 == pid && ns2 == 42 && ns1 == 1)
384		ksft_test_result_pass(
385			"PIDs in all namespaces as expected (%d,%d,%d)\n",
386			ns3, ns2, ns1);
387	else
388		ksft_test_result_fail(
389			"PIDs in all namespaces not as expected (%d,%d,%d)\n",
390			ns3, ns2, ns1);
391out:
392	ret = 0;
393
394	return !ret ? ksft_exit_pass() : ksft_exit_fail();
395}
396