1// SPDX-License-Identifier: GPL-2.0
2
3#define _GNU_SOURCE
4#include <errno.h>
5#include <fcntl.h>
6#include <limits.h>
7#include <linux/types.h>
8#include <poll.h>
9#include <sched.h>
10#include <signal.h>
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include <syscall.h>
15#include <sys/prctl.h>
16#include <sys/wait.h>
17#include <unistd.h>
18#include <sys/socket.h>
19#include <linux/kcmp.h>
20
21#include "pidfd.h"
22#include "../kselftest_harness.h"
23
24/*
25 * UNKNOWN_FD is an fd number that should never exist in the child, as it is
26 * used to check the negative case.
27 */
28#define UNKNOWN_FD 111
29#define UID_NOBODY 65535
30
31static int sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1,
32		    unsigned long idx2)
33{
34	return syscall(__NR_kcmp, pid1, pid2, type, idx1, idx2);
35}
36
37static int __child(int sk, int memfd)
38{
39	int ret;
40	char buf;
41
42	/*
43	 * Ensure we don't leave around a bunch of orphaned children if our
44	 * tests fail.
45	 */
46	ret = prctl(PR_SET_PDEATHSIG, SIGKILL);
47	if (ret) {
48		fprintf(stderr, "%s: Child could not set DEATHSIG\n",
49			strerror(errno));
50		return -1;
51	}
52
53	ret = send(sk, &memfd, sizeof(memfd), 0);
54	if (ret != sizeof(memfd)) {
55		fprintf(stderr, "%s: Child failed to send fd number\n",
56			strerror(errno));
57		return -1;
58	}
59
60	/*
61	 * The fixture setup is completed at this point. The tests will run.
62	 *
63	 * This blocking recv enables the parent to message the child.
64	 * Either we will read 'P' off of the sk, indicating that we need
65	 * to disable ptrace, or we will read a 0, indicating that the other
66	 * side has closed the sk. This occurs during fixture teardown time,
67	 * indicating that the child should exit.
68	 */
69	while ((ret = recv(sk, &buf, sizeof(buf), 0)) > 0) {
70		if (buf == 'P') {
71			ret = prctl(PR_SET_DUMPABLE, 0);
72			if (ret < 0) {
73				fprintf(stderr,
74					"%s: Child failed to disable ptrace\n",
75					strerror(errno));
76				return -1;
77			}
78		} else {
79			fprintf(stderr, "Child received unknown command %c\n",
80				buf);
81			return -1;
82		}
83		ret = send(sk, &buf, sizeof(buf), 0);
84		if (ret != 1) {
85			fprintf(stderr, "%s: Child failed to ack\n",
86				strerror(errno));
87			return -1;
88		}
89	}
90	if (ret < 0) {
91		fprintf(stderr, "%s: Child failed to read from socket\n",
92			strerror(errno));
93		return -1;
94	}
95
96	return 0;
97}
98
99static int child(int sk)
100{
101	int memfd, ret;
102
103	memfd = sys_memfd_create("test", 0);
104	if (memfd < 0) {
105		fprintf(stderr, "%s: Child could not create memfd\n",
106			strerror(errno));
107		ret = -1;
108	} else {
109		ret = __child(sk, memfd);
110		close(memfd);
111	}
112
113	close(sk);
114	return ret;
115}
116
117FIXTURE(child)
118{
119	/*
120	 * remote_fd is the number of the FD which we are trying to retrieve
121	 * from the child.
122	 */
123	int remote_fd;
124	/* pid points to the child which we are fetching FDs from */
125	pid_t pid;
126	/* pidfd is the pidfd of the child */
127	int pidfd;
128	/*
129	 * sk is our side of the socketpair used to communicate with the child.
130	 * When it is closed, the child will exit.
131	 */
132	int sk;
133	bool ignore_child_result;
134};
135
136FIXTURE_SETUP(child)
137{
138	int ret, sk_pair[2];
139
140	ASSERT_EQ(0, socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair)) {
141		TH_LOG("%s: failed to create socketpair", strerror(errno));
142	}
143	self->sk = sk_pair[0];
144
145	self->pid = fork();
146	ASSERT_GE(self->pid, 0);
147
148	if (self->pid == 0) {
149		close(sk_pair[0]);
150		if (child(sk_pair[1]))
151			_exit(EXIT_FAILURE);
152		_exit(EXIT_SUCCESS);
153	}
154
155	close(sk_pair[1]);
156
157	self->pidfd = sys_pidfd_open(self->pid, 0);
158	ASSERT_GE(self->pidfd, 0);
159
160	/*
161	 * Wait for the child to complete setup. It'll send the remote memfd's
162	 * number when ready.
163	 */
164	ret = recv(sk_pair[0], &self->remote_fd, sizeof(self->remote_fd), 0);
165	ASSERT_EQ(sizeof(self->remote_fd), ret);
166}
167
168FIXTURE_TEARDOWN(child)
169{
170	int ret;
171
172	EXPECT_EQ(0, close(self->pidfd));
173	EXPECT_EQ(0, close(self->sk));
174
175	ret = wait_for_pid(self->pid);
176	if (!self->ignore_child_result)
177		EXPECT_EQ(0, ret);
178}
179
180TEST_F(child, disable_ptrace)
181{
182	int uid, fd;
183	char c;
184
185	/*
186	 * Turn into nobody if we're root, to avoid CAP_SYS_PTRACE
187	 *
188	 * The tests should run in their own process, so even this test fails,
189	 * it shouldn't result in subsequent tests failing.
190	 */
191	uid = getuid();
192	if (uid == 0)
193		ASSERT_EQ(0, seteuid(UID_NOBODY));
194
195	ASSERT_EQ(1, send(self->sk, "P", 1, 0));
196	ASSERT_EQ(1, recv(self->sk, &c, 1, 0));
197
198	fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0);
199	EXPECT_EQ(-1, fd);
200	EXPECT_EQ(EPERM, errno);
201
202	if (uid == 0)
203		ASSERT_EQ(0, seteuid(0));
204}
205
206TEST_F(child, fetch_fd)
207{
208	int fd, ret;
209
210	fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0);
211	ASSERT_GE(fd, 0);
212
213	ret = sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd);
214	if (ret < 0 && errno == ENOSYS)
215		SKIP(return, "kcmp() syscall not supported");
216	EXPECT_EQ(ret, 0);
217
218	ret = fcntl(fd, F_GETFD);
219	ASSERT_GE(ret, 0);
220	EXPECT_GE(ret & FD_CLOEXEC, 0);
221
222	close(fd);
223}
224
225TEST_F(child, test_unknown_fd)
226{
227	int fd;
228
229	fd = sys_pidfd_getfd(self->pidfd, UNKNOWN_FD, 0);
230	EXPECT_EQ(-1, fd) {
231		TH_LOG("getfd succeeded while fetching unknown fd");
232	};
233	EXPECT_EQ(EBADF, errno) {
234		TH_LOG("%s: getfd did not get EBADF", strerror(errno));
235	}
236}
237
238TEST(flags_set)
239{
240	ASSERT_EQ(-1, sys_pidfd_getfd(0, 0, 1));
241	EXPECT_EQ(errno, EINVAL);
242}
243
244TEST_F(child, no_strange_EBADF)
245{
246	struct pollfd fds;
247
248	self->ignore_child_result = true;
249
250	fds.fd = self->pidfd;
251	fds.events = POLLIN;
252
253	ASSERT_EQ(kill(self->pid, SIGKILL), 0);
254	ASSERT_EQ(poll(&fds, 1, 5000), 1);
255
256	/*
257	 * It used to be that pidfd_getfd() could race with the exiting thread
258	 * between exit_files() and release_task(), and get a non-null task
259	 * with a NULL files struct, and you'd get EBADF, which was slightly
260	 * confusing.
261	 */
262	errno = 0;
263	EXPECT_EQ(sys_pidfd_getfd(self->pidfd, self->remote_fd, 0), -1);
264	EXPECT_EQ(errno, ESRCH);
265}
266
267#if __NR_pidfd_getfd == -1
268int main(void)
269{
270	fprintf(stderr, "__NR_pidfd_getfd undefined. The pidfd_getfd syscall is unavailable. Test aborting\n");
271	return KSFT_SKIP;
272}
273#else
274TEST_HARNESS_MAIN
275#endif
276