1// SPDX-License-Identifier: GPL-2.0
2#define _GNU_SOURCE
3#include <sched.h>
4#include <stdio.h>
5#include <errno.h>
6#include <string.h>
7#include <sys/stat.h>
8#include <sys/types.h>
9#include <sys/mount.h>
10#include <sys/wait.h>
11#include <stdlib.h>
12#include <unistd.h>
13#include <fcntl.h>
14#include <stdbool.h>
15#include <stdarg.h>
16#include <sys/syscall.h>
17
18#include "../kselftest_harness.h"
19
20#ifndef CLONE_NEWNS
21#define CLONE_NEWNS 0x00020000
22#endif
23
24#ifndef CLONE_NEWUSER
25#define CLONE_NEWUSER 0x10000000
26#endif
27
28#ifndef MS_SHARED
29#define MS_SHARED (1 << 20)
30#endif
31
32#ifndef MS_PRIVATE
33#define MS_PRIVATE (1<<18)
34#endif
35
36#ifndef MOVE_MOUNT_SET_GROUP
37#define MOVE_MOUNT_SET_GROUP 0x00000100
38#endif
39
40#ifndef MOVE_MOUNT_F_EMPTY_PATH
41#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004
42#endif
43
44#ifndef MOVE_MOUNT_T_EMPTY_PATH
45#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040
46#endif
47
48static ssize_t write_nointr(int fd, const void *buf, size_t count)
49{
50	ssize_t ret;
51
52	do {
53		ret = write(fd, buf, count);
54	} while (ret < 0 && errno == EINTR);
55
56	return ret;
57}
58
59static int write_file(const char *path, const void *buf, size_t count)
60{
61	int fd;
62	ssize_t ret;
63
64	fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
65	if (fd < 0)
66		return -1;
67
68	ret = write_nointr(fd, buf, count);
69	close(fd);
70	if (ret < 0 || (size_t)ret != count)
71		return -1;
72
73	return 0;
74}
75
76static int create_and_enter_userns(void)
77{
78	uid_t uid;
79	gid_t gid;
80	char map[100];
81
82	uid = getuid();
83	gid = getgid();
84
85	if (unshare(CLONE_NEWUSER))
86		return -1;
87
88	if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
89	    errno != ENOENT)
90		return -1;
91
92	snprintf(map, sizeof(map), "0 %d 1", uid);
93	if (write_file("/proc/self/uid_map", map, strlen(map)))
94		return -1;
95
96
97	snprintf(map, sizeof(map), "0 %d 1", gid);
98	if (write_file("/proc/self/gid_map", map, strlen(map)))
99		return -1;
100
101	if (setgid(0))
102		return -1;
103
104	if (setuid(0))
105		return -1;
106
107	return 0;
108}
109
110static int prepare_unpriv_mountns(void)
111{
112	if (create_and_enter_userns())
113		return -1;
114
115	if (unshare(CLONE_NEWNS))
116		return -1;
117
118	if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
119		return -1;
120
121	return 0;
122}
123
124static char *get_field(char *src, int nfields)
125{
126	int i;
127	char *p = src;
128
129	for (i = 0; i < nfields; i++) {
130		while (*p && *p != ' ' && *p != '\t')
131			p++;
132
133		if (!*p)
134			break;
135
136		p++;
137	}
138
139	return p;
140}
141
142static void null_endofword(char *word)
143{
144	while (*word && *word != ' ' && *word != '\t')
145		word++;
146	*word = '\0';
147}
148
149static bool is_shared_mount(const char *path)
150{
151	size_t len = 0;
152	char *line = NULL;
153	FILE *f = NULL;
154
155	f = fopen("/proc/self/mountinfo", "re");
156	if (!f)
157		return false;
158
159	while (getline(&line, &len, f) != -1) {
160		char *opts, *target;
161
162		target = get_field(line, 4);
163		if (!target)
164			continue;
165
166		opts = get_field(target, 2);
167		if (!opts)
168			continue;
169
170		null_endofword(target);
171
172		if (strcmp(target, path) != 0)
173			continue;
174
175		null_endofword(opts);
176		if (strstr(opts, "shared:"))
177			return true;
178	}
179
180	free(line);
181	fclose(f);
182
183	return false;
184}
185
186/* Attempt to de-conflict with the selftests tree. */
187#ifndef SKIP
188#define SKIP(s, ...)	XFAIL(s, ##__VA_ARGS__)
189#endif
190
191#define SET_GROUP_FROM	"/tmp/move_mount_set_group_supported_from"
192#define SET_GROUP_TO	"/tmp/move_mount_set_group_supported_to"
193
194static bool move_mount_set_group_supported(void)
195{
196	int ret;
197
198	if (mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
199		  "size=100000,mode=700"))
200		return -1;
201
202	if (mount(NULL, "/tmp", NULL, MS_PRIVATE, 0))
203		return -1;
204
205	if (mkdir(SET_GROUP_FROM, 0777))
206		return -1;
207
208	if (mkdir(SET_GROUP_TO, 0777))
209		return -1;
210
211	if (mount("testing", SET_GROUP_FROM, "tmpfs", MS_NOATIME | MS_NODEV,
212		  "size=100000,mode=700"))
213		return -1;
214
215	if (mount(SET_GROUP_FROM, SET_GROUP_TO, NULL, MS_BIND, NULL))
216		return -1;
217
218	if (mount(NULL, SET_GROUP_FROM, NULL, MS_SHARED, 0))
219		return -1;
220
221	ret = syscall(__NR_move_mount, AT_FDCWD, SET_GROUP_FROM,
222		      AT_FDCWD, SET_GROUP_TO, MOVE_MOUNT_SET_GROUP);
223	umount2("/tmp", MNT_DETACH);
224
225	return ret >= 0;
226}
227
228FIXTURE(move_mount_set_group) {
229};
230
231#define SET_GROUP_A "/tmp/A"
232
233FIXTURE_SETUP(move_mount_set_group)
234{
235	bool ret;
236
237	ASSERT_EQ(prepare_unpriv_mountns(), 0);
238
239	ret = move_mount_set_group_supported();
240	ASSERT_GE(ret, 0);
241	if (!ret)
242		SKIP(return, "move_mount(MOVE_MOUNT_SET_GROUP) is not supported");
243
244	umount2("/tmp", MNT_DETACH);
245
246	ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
247			"size=100000,mode=700"), 0);
248
249	ASSERT_EQ(mkdir(SET_GROUP_A, 0777), 0);
250
251	ASSERT_EQ(mount("testing", SET_GROUP_A, "tmpfs", MS_NOATIME | MS_NODEV,
252			"size=100000,mode=700"), 0);
253}
254
255FIXTURE_TEARDOWN(move_mount_set_group)
256{
257	bool ret;
258
259	ret = move_mount_set_group_supported();
260	ASSERT_GE(ret, 0);
261	if (!ret)
262		SKIP(return, "move_mount(MOVE_MOUNT_SET_GROUP) is not supported");
263
264	umount2("/tmp", MNT_DETACH);
265}
266
267#define __STACK_SIZE (8 * 1024 * 1024)
268static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
269{
270	void *stack;
271
272	stack = malloc(__STACK_SIZE);
273	if (!stack)
274		return -ENOMEM;
275
276#ifdef __ia64__
277	return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
278#else
279	return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
280#endif
281}
282
283static int wait_for_pid(pid_t pid)
284{
285	int status, ret;
286
287again:
288	ret = waitpid(pid, &status, 0);
289	if (ret == -1) {
290		if (errno == EINTR)
291			goto again;
292
293		return -1;
294	}
295
296	if (!WIFEXITED(status))
297		return -1;
298
299	return WEXITSTATUS(status);
300}
301
302struct child_args {
303	int unsfd;
304	int mntnsfd;
305	bool shared;
306	int mntfd;
307};
308
309static int get_nestedns_mount_cb(void *data)
310{
311	struct child_args *ca = (struct child_args *)data;
312	int ret;
313
314	ret = prepare_unpriv_mountns();
315	if (ret)
316		return 1;
317
318	if (ca->shared) {
319		ret = mount(NULL, SET_GROUP_A, NULL, MS_SHARED, 0);
320		if (ret)
321			return 1;
322	}
323
324	ret = open("/proc/self/ns/user", O_RDONLY);
325	if (ret < 0)
326		return 1;
327	ca->unsfd = ret;
328
329	ret = open("/proc/self/ns/mnt", O_RDONLY);
330	if (ret < 0)
331		return 1;
332	ca->mntnsfd = ret;
333
334	ret = open(SET_GROUP_A, O_RDONLY);
335	if (ret < 0)
336		return 1;
337	ca->mntfd = ret;
338
339	return 0;
340}
341
342TEST_F(move_mount_set_group, complex_sharing_copying)
343{
344	struct child_args ca_from = {
345		.shared = true,
346	};
347	struct child_args ca_to = {
348		.shared = false,
349	};
350	pid_t pid;
351	bool ret;
352
353	ret = move_mount_set_group_supported();
354	ASSERT_GE(ret, 0);
355	if (!ret)
356		SKIP(return, "move_mount(MOVE_MOUNT_SET_GROUP) is not supported");
357
358	pid = do_clone(get_nestedns_mount_cb, (void *)&ca_from, CLONE_VFORK |
359		       CLONE_VM | CLONE_FILES); ASSERT_GT(pid, 0);
360	ASSERT_EQ(wait_for_pid(pid), 0);
361
362	pid = do_clone(get_nestedns_mount_cb, (void *)&ca_to, CLONE_VFORK |
363		       CLONE_VM | CLONE_FILES); ASSERT_GT(pid, 0);
364	ASSERT_EQ(wait_for_pid(pid), 0);
365
366	ASSERT_EQ(syscall(__NR_move_mount, ca_from.mntfd, "",
367			  ca_to.mntfd, "", MOVE_MOUNT_SET_GROUP
368			  | MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH),
369		  0);
370
371	ASSERT_EQ(setns(ca_to.mntnsfd, CLONE_NEWNS), 0);
372	ASSERT_EQ(is_shared_mount(SET_GROUP_A), 1);
373}
374
375TEST_HARNESS_MAIN
376