1// SPDX-License-Identifier: GPL-2.0
2#define _GNU_SOURCE
3#include <sched.h>
4#include <stdio.h>
5#include <errno.h>
6#include <pthread.h>
7#include <string.h>
8#include <sys/stat.h>
9#include <sys/types.h>
10#include <sys/mount.h>
11#include <sys/wait.h>
12#include <sys/vfs.h>
13#include <sys/statvfs.h>
14#include <sys/sysinfo.h>
15#include <stdlib.h>
16#include <unistd.h>
17#include <fcntl.h>
18#include <grp.h>
19#include <stdbool.h>
20#include <stdarg.h>
21#include <linux/mount.h>
22
23#include "../kselftest_harness.h"
24
25#ifndef CLONE_NEWNS
26#define CLONE_NEWNS 0x00020000
27#endif
28
29#ifndef CLONE_NEWUSER
30#define CLONE_NEWUSER 0x10000000
31#endif
32
33#ifndef MS_REC
34#define MS_REC 16384
35#endif
36
37#ifndef MS_RELATIME
38#define MS_RELATIME (1 << 21)
39#endif
40
41#ifndef MS_STRICTATIME
42#define MS_STRICTATIME (1 << 24)
43#endif
44
45#ifndef MOUNT_ATTR_RDONLY
46#define MOUNT_ATTR_RDONLY 0x00000001
47#endif
48
49#ifndef MOUNT_ATTR_NOSUID
50#define MOUNT_ATTR_NOSUID 0x00000002
51#endif
52
53#ifndef MOUNT_ATTR_NOEXEC
54#define MOUNT_ATTR_NOEXEC 0x00000008
55#endif
56
57#ifndef MOUNT_ATTR_NODIRATIME
58#define MOUNT_ATTR_NODIRATIME 0x00000080
59#endif
60
61#ifndef MOUNT_ATTR__ATIME
62#define MOUNT_ATTR__ATIME 0x00000070
63#endif
64
65#ifndef MOUNT_ATTR_RELATIME
66#define MOUNT_ATTR_RELATIME 0x00000000
67#endif
68
69#ifndef MOUNT_ATTR_NOATIME
70#define MOUNT_ATTR_NOATIME 0x00000010
71#endif
72
73#ifndef MOUNT_ATTR_STRICTATIME
74#define MOUNT_ATTR_STRICTATIME 0x00000020
75#endif
76
77#ifndef AT_RECURSIVE
78#define AT_RECURSIVE 0x8000
79#endif
80
81#ifndef MS_SHARED
82#define MS_SHARED (1 << 20)
83#endif
84
85#define DEFAULT_THREADS 4
86#define ptr_to_int(p) ((int)((intptr_t)(p)))
87#define int_to_ptr(u) ((void *)((intptr_t)(u)))
88
89#ifndef __NR_mount_setattr
90	#if defined __alpha__
91		#define __NR_mount_setattr 552
92	#elif defined _MIPS_SIM
93		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
94			#define __NR_mount_setattr (442 + 4000)
95		#endif
96		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
97			#define __NR_mount_setattr (442 + 6000)
98		#endif
99		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
100			#define __NR_mount_setattr (442 + 5000)
101		#endif
102	#elif defined __ia64__
103		#define __NR_mount_setattr (442 + 1024)
104	#else
105		#define __NR_mount_setattr 442
106	#endif
107#endif
108
109#ifndef __NR_open_tree
110	#if defined __alpha__
111		#define __NR_open_tree 538
112	#elif defined _MIPS_SIM
113		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
114			#define __NR_open_tree 4428
115		#endif
116		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
117			#define __NR_open_tree 6428
118		#endif
119		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
120			#define __NR_open_tree 5428
121		#endif
122	#elif defined __ia64__
123		#define __NR_open_tree (428 + 1024)
124	#else
125		#define __NR_open_tree 428
126	#endif
127#endif
128
129#ifndef MOUNT_ATTR_IDMAP
130#define MOUNT_ATTR_IDMAP 0x00100000
131#endif
132
133#ifndef MOUNT_ATTR_NOSYMFOLLOW
134#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000
135#endif
136
137static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags,
138				    struct mount_attr *attr, size_t size)
139{
140	return syscall(__NR_mount_setattr, dfd, path, flags, attr, size);
141}
142
143#ifndef OPEN_TREE_CLONE
144#define OPEN_TREE_CLONE 1
145#endif
146
147#ifndef OPEN_TREE_CLOEXEC
148#define OPEN_TREE_CLOEXEC O_CLOEXEC
149#endif
150
151#ifndef AT_RECURSIVE
152#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
153#endif
154
155static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
156{
157	return syscall(__NR_open_tree, dfd, filename, flags);
158}
159
160static ssize_t write_nointr(int fd, const void *buf, size_t count)
161{
162	ssize_t ret;
163
164	do {
165		ret = write(fd, buf, count);
166	} while (ret < 0 && errno == EINTR);
167
168	return ret;
169}
170
171static int write_file(const char *path, const void *buf, size_t count)
172{
173	int fd;
174	ssize_t ret;
175
176	fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
177	if (fd < 0)
178		return -1;
179
180	ret = write_nointr(fd, buf, count);
181	close(fd);
182	if (ret < 0 || (size_t)ret != count)
183		return -1;
184
185	return 0;
186}
187
188static int create_and_enter_userns(void)
189{
190	uid_t uid;
191	gid_t gid;
192	char map[100];
193
194	uid = getuid();
195	gid = getgid();
196
197	if (unshare(CLONE_NEWUSER))
198		return -1;
199
200	if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
201	    errno != ENOENT)
202		return -1;
203
204	snprintf(map, sizeof(map), "0 %d 1", uid);
205	if (write_file("/proc/self/uid_map", map, strlen(map)))
206		return -1;
207
208
209	snprintf(map, sizeof(map), "0 %d 1", gid);
210	if (write_file("/proc/self/gid_map", map, strlen(map)))
211		return -1;
212
213	if (setgid(0))
214		return -1;
215
216	if (setuid(0))
217		return -1;
218
219	return 0;
220}
221
222static int prepare_unpriv_mountns(void)
223{
224	if (create_and_enter_userns())
225		return -1;
226
227	if (unshare(CLONE_NEWNS))
228		return -1;
229
230	if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
231		return -1;
232
233	return 0;
234}
235
236#ifndef ST_NOSYMFOLLOW
237#define ST_NOSYMFOLLOW 0x2000 /* do not follow symlinks */
238#endif
239
240static int read_mnt_flags(const char *path)
241{
242	int ret;
243	struct statvfs stat;
244	unsigned int mnt_flags;
245
246	ret = statvfs(path, &stat);
247	if (ret != 0)
248		return -EINVAL;
249
250	if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC |
251			    ST_NOATIME | ST_NODIRATIME | ST_RELATIME |
252			    ST_SYNCHRONOUS | ST_MANDLOCK | ST_NOSYMFOLLOW))
253		return -EINVAL;
254
255	mnt_flags = 0;
256	if (stat.f_flag & ST_RDONLY)
257		mnt_flags |= MS_RDONLY;
258	if (stat.f_flag & ST_NOSUID)
259		mnt_flags |= MS_NOSUID;
260	if (stat.f_flag & ST_NODEV)
261		mnt_flags |= MS_NODEV;
262	if (stat.f_flag & ST_NOEXEC)
263		mnt_flags |= MS_NOEXEC;
264	if (stat.f_flag & ST_NOATIME)
265		mnt_flags |= MS_NOATIME;
266	if (stat.f_flag & ST_NODIRATIME)
267		mnt_flags |= MS_NODIRATIME;
268	if (stat.f_flag & ST_RELATIME)
269		mnt_flags |= MS_RELATIME;
270	if (stat.f_flag & ST_SYNCHRONOUS)
271		mnt_flags |= MS_SYNCHRONOUS;
272	if (stat.f_flag & ST_MANDLOCK)
273		mnt_flags |= ST_MANDLOCK;
274	if (stat.f_flag & ST_NOSYMFOLLOW)
275		mnt_flags |= ST_NOSYMFOLLOW;
276
277	return mnt_flags;
278}
279
280static char *get_field(char *src, int nfields)
281{
282	int i;
283	char *p = src;
284
285	for (i = 0; i < nfields; i++) {
286		while (*p && *p != ' ' && *p != '\t')
287			p++;
288
289		if (!*p)
290			break;
291
292		p++;
293	}
294
295	return p;
296}
297
298static void null_endofword(char *word)
299{
300	while (*word && *word != ' ' && *word != '\t')
301		word++;
302	*word = '\0';
303}
304
305static bool is_shared_mount(const char *path)
306{
307	size_t len = 0;
308	char *line = NULL;
309	FILE *f = NULL;
310
311	f = fopen("/proc/self/mountinfo", "re");
312	if (!f)
313		return false;
314
315	while (getline(&line, &len, f) != -1) {
316		char *opts, *target;
317
318		target = get_field(line, 4);
319		if (!target)
320			continue;
321
322		opts = get_field(target, 2);
323		if (!opts)
324			continue;
325
326		null_endofword(target);
327
328		if (strcmp(target, path) != 0)
329			continue;
330
331		null_endofword(opts);
332		if (strstr(opts, "shared:"))
333			return true;
334	}
335
336	free(line);
337	fclose(f);
338
339	return false;
340}
341
342static void *mount_setattr_thread(void *data)
343{
344	struct mount_attr attr = {
345		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID,
346		.attr_clr	= 0,
347		.propagation	= MS_SHARED,
348	};
349
350	if (sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)))
351		pthread_exit(int_to_ptr(-1));
352
353	pthread_exit(int_to_ptr(0));
354}
355
356/* Attempt to de-conflict with the selftests tree. */
357#ifndef SKIP
358#define SKIP(s, ...)	XFAIL(s, ##__VA_ARGS__)
359#endif
360
361static bool mount_setattr_supported(void)
362{
363	int ret;
364
365	ret = sys_mount_setattr(-EBADF, "", AT_EMPTY_PATH, NULL, 0);
366	if (ret < 0 && errno == ENOSYS)
367		return false;
368
369	return true;
370}
371
372FIXTURE(mount_setattr) {
373};
374
375#define NOSYMFOLLOW_TARGET "/mnt/A/AA/data"
376#define NOSYMFOLLOW_SYMLINK "/mnt/A/AA/symlink"
377
378FIXTURE_SETUP(mount_setattr)
379{
380	int fd = -EBADF;
381
382	if (!mount_setattr_supported())
383		SKIP(return, "mount_setattr syscall not supported");
384
385	ASSERT_EQ(prepare_unpriv_mountns(), 0);
386
387	(void)umount2("/mnt", MNT_DETACH);
388	(void)umount2("/tmp", MNT_DETACH);
389
390	ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
391			"size=100000,mode=700"), 0);
392
393	ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
394
395	ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
396			"size=100000,mode=700"), 0);
397
398	ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
399
400	ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
401			"size=100000,mode=700"), 0);
402
403	ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
404			"size=100000,mode=700"), 0);
405
406	ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
407
408	ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
409			"size=100000,mode=700"), 0);
410
411	ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
412
413	ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
414
415	ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
416
417	ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
418			MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
419
420	ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
421
422	ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
423			MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
424
425	fd = creat(NOSYMFOLLOW_TARGET, O_RDWR | O_CLOEXEC);
426	ASSERT_GT(fd, 0);
427	ASSERT_EQ(symlink(NOSYMFOLLOW_TARGET, NOSYMFOLLOW_SYMLINK), 0);
428	ASSERT_EQ(close(fd), 0);
429}
430
431FIXTURE_TEARDOWN(mount_setattr)
432{
433	if (!mount_setattr_supported())
434		SKIP(return, "mount_setattr syscall not supported");
435
436	(void)umount2("/mnt/A", MNT_DETACH);
437	(void)umount2("/tmp", MNT_DETACH);
438}
439
440TEST_F(mount_setattr, invalid_attributes)
441{
442	struct mount_attr invalid_attr = {
443		.attr_set = (1U << 31),
444	};
445
446	if (!mount_setattr_supported())
447		SKIP(return, "mount_setattr syscall not supported");
448
449	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
450				    sizeof(invalid_attr)), 0);
451
452	invalid_attr.attr_set	= 0;
453	invalid_attr.attr_clr	= (1U << 31);
454	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
455				    sizeof(invalid_attr)), 0);
456
457	invalid_attr.attr_clr		= 0;
458	invalid_attr.propagation	= (1U << 31);
459	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
460				    sizeof(invalid_attr)), 0);
461
462	invalid_attr.attr_set		= (1U << 31);
463	invalid_attr.attr_clr		= (1U << 31);
464	invalid_attr.propagation	= (1U << 31);
465	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
466				    sizeof(invalid_attr)), 0);
467
468	ASSERT_NE(sys_mount_setattr(-1, "mnt/A", AT_RECURSIVE, &invalid_attr,
469				    sizeof(invalid_attr)), 0);
470}
471
472TEST_F(mount_setattr, extensibility)
473{
474	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
475	char *s = "dummy";
476	struct mount_attr invalid_attr = {};
477	struct mount_attr_large {
478		struct mount_attr attr1;
479		struct mount_attr attr2;
480		struct mount_attr attr3;
481	} large_attr = {};
482
483	if (!mount_setattr_supported())
484		SKIP(return, "mount_setattr syscall not supported");
485
486	old_flags = read_mnt_flags("/mnt/A");
487	ASSERT_GT(old_flags, 0);
488
489	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, NULL,
490				    sizeof(invalid_attr)), 0);
491	ASSERT_EQ(errno, EFAULT);
492
493	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, (void *)s,
494				    sizeof(invalid_attr)), 0);
495	ASSERT_EQ(errno, EINVAL);
496
497	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 0), 0);
498	ASSERT_EQ(errno, EINVAL);
499
500	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
501				    sizeof(invalid_attr) / 2), 0);
502	ASSERT_EQ(errno, EINVAL);
503
504	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
505				    sizeof(invalid_attr) / 2), 0);
506	ASSERT_EQ(errno, EINVAL);
507
508	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
509				    (void *)&large_attr, sizeof(large_attr)), 0);
510
511	large_attr.attr3.attr_set = MOUNT_ATTR_RDONLY;
512	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
513				    (void *)&large_attr, sizeof(large_attr)), 0);
514
515	large_attr.attr3.attr_set = 0;
516	large_attr.attr1.attr_set = MOUNT_ATTR_RDONLY;
517	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
518				    (void *)&large_attr, sizeof(large_attr)), 0);
519
520	expected_flags = old_flags;
521	expected_flags |= MS_RDONLY;
522
523	new_flags = read_mnt_flags("/mnt/A");
524	ASSERT_EQ(new_flags, expected_flags);
525
526	new_flags = read_mnt_flags("/mnt/A/AA");
527	ASSERT_EQ(new_flags, expected_flags);
528
529	new_flags = read_mnt_flags("/mnt/A/AA/B");
530	ASSERT_EQ(new_flags, expected_flags);
531
532	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
533	ASSERT_EQ(new_flags, expected_flags);
534}
535
536TEST_F(mount_setattr, basic)
537{
538	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
539	struct mount_attr attr = {
540		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
541		.attr_clr	= MOUNT_ATTR__ATIME,
542	};
543
544	if (!mount_setattr_supported())
545		SKIP(return, "mount_setattr syscall not supported");
546
547	old_flags = read_mnt_flags("/mnt/A");
548	ASSERT_GT(old_flags, 0);
549
550	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", 0, &attr, sizeof(attr)), 0);
551
552	expected_flags = old_flags;
553	expected_flags |= MS_RDONLY;
554	expected_flags |= MS_NOEXEC;
555	expected_flags &= ~MS_NOATIME;
556	expected_flags |= MS_RELATIME;
557
558	new_flags = read_mnt_flags("/mnt/A");
559	ASSERT_EQ(new_flags, expected_flags);
560
561	new_flags = read_mnt_flags("/mnt/A/AA");
562	ASSERT_EQ(new_flags, old_flags);
563
564	new_flags = read_mnt_flags("/mnt/A/AA/B");
565	ASSERT_EQ(new_flags, old_flags);
566
567	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
568	ASSERT_EQ(new_flags, old_flags);
569}
570
571TEST_F(mount_setattr, basic_recursive)
572{
573	int fd;
574	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
575	struct mount_attr attr = {
576		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
577		.attr_clr	= MOUNT_ATTR__ATIME,
578	};
579
580	if (!mount_setattr_supported())
581		SKIP(return, "mount_setattr syscall not supported");
582
583	old_flags = read_mnt_flags("/mnt/A");
584	ASSERT_GT(old_flags, 0);
585
586	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
587
588	expected_flags = old_flags;
589	expected_flags |= MS_RDONLY;
590	expected_flags |= MS_NOEXEC;
591	expected_flags &= ~MS_NOATIME;
592	expected_flags |= MS_RELATIME;
593
594	new_flags = read_mnt_flags("/mnt/A");
595	ASSERT_EQ(new_flags, expected_flags);
596
597	new_flags = read_mnt_flags("/mnt/A/AA");
598	ASSERT_EQ(new_flags, expected_flags);
599
600	new_flags = read_mnt_flags("/mnt/A/AA/B");
601	ASSERT_EQ(new_flags, expected_flags);
602
603	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
604	ASSERT_EQ(new_flags, expected_flags);
605
606	memset(&attr, 0, sizeof(attr));
607	attr.attr_clr = MOUNT_ATTR_RDONLY;
608	attr.propagation = MS_SHARED;
609	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
610
611	expected_flags &= ~MS_RDONLY;
612	new_flags = read_mnt_flags("/mnt/A");
613	ASSERT_EQ(new_flags, expected_flags);
614
615	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
616
617	new_flags = read_mnt_flags("/mnt/A/AA");
618	ASSERT_EQ(new_flags, expected_flags);
619
620	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
621
622	new_flags = read_mnt_flags("/mnt/A/AA/B");
623	ASSERT_EQ(new_flags, expected_flags);
624
625	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
626
627	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
628	ASSERT_EQ(new_flags, expected_flags);
629
630	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
631
632	fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
633	ASSERT_GE(fd, 0);
634
635	/*
636	 * We're holding a fd open for writing so this needs to fail somewhere
637	 * in the middle and the mount options need to be unchanged.
638	 */
639	attr.attr_set = MOUNT_ATTR_RDONLY;
640	ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
641
642	new_flags = read_mnt_flags("/mnt/A");
643	ASSERT_EQ(new_flags, expected_flags);
644
645	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
646
647	new_flags = read_mnt_flags("/mnt/A/AA");
648	ASSERT_EQ(new_flags, expected_flags);
649
650	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
651
652	new_flags = read_mnt_flags("/mnt/A/AA/B");
653	ASSERT_EQ(new_flags, expected_flags);
654
655	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
656
657	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
658	ASSERT_EQ(new_flags, expected_flags);
659
660	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
661
662	EXPECT_EQ(close(fd), 0);
663}
664
665TEST_F(mount_setattr, mount_has_writers)
666{
667	int fd, dfd;
668	unsigned int old_flags = 0, new_flags = 0;
669	struct mount_attr attr = {
670		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
671		.attr_clr	= MOUNT_ATTR__ATIME,
672		.propagation	= MS_SHARED,
673	};
674
675	if (!mount_setattr_supported())
676		SKIP(return, "mount_setattr syscall not supported");
677
678	old_flags = read_mnt_flags("/mnt/A");
679	ASSERT_GT(old_flags, 0);
680
681	fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
682	ASSERT_GE(fd, 0);
683
684	/*
685	 * We're holding a fd open to a mount somwhere in the middle so this
686	 * needs to fail somewhere in the middle. After this the mount options
687	 * need to be unchanged.
688	 */
689	ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
690
691	new_flags = read_mnt_flags("/mnt/A");
692	ASSERT_EQ(new_flags, old_flags);
693
694	ASSERT_EQ(is_shared_mount("/mnt/A"), false);
695
696	new_flags = read_mnt_flags("/mnt/A/AA");
697	ASSERT_EQ(new_flags, old_flags);
698
699	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), false);
700
701	new_flags = read_mnt_flags("/mnt/A/AA/B");
702	ASSERT_EQ(new_flags, old_flags);
703
704	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), false);
705
706	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
707	ASSERT_EQ(new_flags, old_flags);
708
709	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), false);
710
711	dfd = open("/mnt/A/AA/B", O_DIRECTORY | O_CLOEXEC);
712	ASSERT_GE(dfd, 0);
713	EXPECT_EQ(fsync(dfd), 0);
714	EXPECT_EQ(close(dfd), 0);
715
716	EXPECT_EQ(fsync(fd), 0);
717	EXPECT_EQ(close(fd), 0);
718
719	/* All writers are gone so this should succeed. */
720	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
721}
722
723TEST_F(mount_setattr, mixed_mount_options)
724{
725	unsigned int old_flags1 = 0, old_flags2 = 0, new_flags = 0, expected_flags = 0;
726	struct mount_attr attr = {
727		.attr_clr = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME,
728		.attr_set = MOUNT_ATTR_RELATIME,
729	};
730
731	if (!mount_setattr_supported())
732		SKIP(return, "mount_setattr syscall not supported");
733
734	old_flags1 = read_mnt_flags("/mnt/B");
735	ASSERT_GT(old_flags1, 0);
736
737	old_flags2 = read_mnt_flags("/mnt/B/BB");
738	ASSERT_GT(old_flags2, 0);
739
740	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/B", AT_RECURSIVE, &attr, sizeof(attr)), 0);
741
742	expected_flags = old_flags2;
743	expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
744	expected_flags |= MS_RELATIME;
745
746	new_flags = read_mnt_flags("/mnt/B");
747	ASSERT_EQ(new_flags, expected_flags);
748
749	expected_flags = old_flags2;
750	expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
751	expected_flags |= MS_RELATIME;
752
753	new_flags = read_mnt_flags("/mnt/B/BB");
754	ASSERT_EQ(new_flags, expected_flags);
755}
756
757TEST_F(mount_setattr, time_changes)
758{
759	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
760	struct mount_attr attr = {
761		.attr_set	= MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME,
762	};
763
764	if (!mount_setattr_supported())
765		SKIP(return, "mount_setattr syscall not supported");
766
767	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
768
769	attr.attr_set = MOUNT_ATTR_STRICTATIME;
770	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
771
772	attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
773	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
774
775	attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
776	attr.attr_clr = MOUNT_ATTR__ATIME;
777	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
778
779	attr.attr_set = 0;
780	attr.attr_clr = MOUNT_ATTR_STRICTATIME;
781	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
782
783	attr.attr_clr = MOUNT_ATTR_NOATIME;
784	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
785
786	old_flags = read_mnt_flags("/mnt/A");
787	ASSERT_GT(old_flags, 0);
788
789	attr.attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME;
790	attr.attr_clr = MOUNT_ATTR__ATIME;
791	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
792
793	expected_flags = old_flags;
794	expected_flags |= MS_NOATIME;
795	expected_flags |= MS_NODIRATIME;
796
797	new_flags = read_mnt_flags("/mnt/A");
798	ASSERT_EQ(new_flags, expected_flags);
799
800	new_flags = read_mnt_flags("/mnt/A/AA");
801	ASSERT_EQ(new_flags, expected_flags);
802
803	new_flags = read_mnt_flags("/mnt/A/AA/B");
804	ASSERT_EQ(new_flags, expected_flags);
805
806	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
807	ASSERT_EQ(new_flags, expected_flags);
808
809	memset(&attr, 0, sizeof(attr));
810	attr.attr_set &= ~MOUNT_ATTR_NOATIME;
811	attr.attr_set |= MOUNT_ATTR_RELATIME;
812	attr.attr_clr |= MOUNT_ATTR__ATIME;
813	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
814
815	expected_flags &= ~MS_NOATIME;
816	expected_flags |= MS_RELATIME;
817
818	new_flags = read_mnt_flags("/mnt/A");
819	ASSERT_EQ(new_flags, expected_flags);
820
821	new_flags = read_mnt_flags("/mnt/A/AA");
822	ASSERT_EQ(new_flags, expected_flags);
823
824	new_flags = read_mnt_flags("/mnt/A/AA/B");
825	ASSERT_EQ(new_flags, expected_flags);
826
827	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
828	ASSERT_EQ(new_flags, expected_flags);
829
830	memset(&attr, 0, sizeof(attr));
831	attr.attr_set &= ~MOUNT_ATTR_RELATIME;
832	attr.attr_set |= MOUNT_ATTR_STRICTATIME;
833	attr.attr_clr |= MOUNT_ATTR__ATIME;
834	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
835
836	expected_flags &= ~MS_RELATIME;
837
838	new_flags = read_mnt_flags("/mnt/A");
839	ASSERT_EQ(new_flags, expected_flags);
840
841	new_flags = read_mnt_flags("/mnt/A/AA");
842	ASSERT_EQ(new_flags, expected_flags);
843
844	new_flags = read_mnt_flags("/mnt/A/AA/B");
845	ASSERT_EQ(new_flags, expected_flags);
846
847	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
848	ASSERT_EQ(new_flags, expected_flags);
849
850	memset(&attr, 0, sizeof(attr));
851	attr.attr_set &= ~MOUNT_ATTR_STRICTATIME;
852	attr.attr_set |= MOUNT_ATTR_NOATIME;
853	attr.attr_clr |= MOUNT_ATTR__ATIME;
854	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
855
856	expected_flags |= MS_NOATIME;
857	new_flags = read_mnt_flags("/mnt/A");
858	ASSERT_EQ(new_flags, expected_flags);
859
860	new_flags = read_mnt_flags("/mnt/A/AA");
861	ASSERT_EQ(new_flags, expected_flags);
862
863	new_flags = read_mnt_flags("/mnt/A/AA/B");
864	ASSERT_EQ(new_flags, expected_flags);
865
866	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
867	ASSERT_EQ(new_flags, expected_flags);
868
869	memset(&attr, 0, sizeof(attr));
870	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
871
872	new_flags = read_mnt_flags("/mnt/A");
873	ASSERT_EQ(new_flags, expected_flags);
874
875	new_flags = read_mnt_flags("/mnt/A/AA");
876	ASSERT_EQ(new_flags, expected_flags);
877
878	new_flags = read_mnt_flags("/mnt/A/AA/B");
879	ASSERT_EQ(new_flags, expected_flags);
880
881	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
882	ASSERT_EQ(new_flags, expected_flags);
883
884	memset(&attr, 0, sizeof(attr));
885	attr.attr_clr = MOUNT_ATTR_NODIRATIME;
886	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
887
888	expected_flags &= ~MS_NODIRATIME;
889
890	new_flags = read_mnt_flags("/mnt/A");
891	ASSERT_EQ(new_flags, expected_flags);
892
893	new_flags = read_mnt_flags("/mnt/A/AA");
894	ASSERT_EQ(new_flags, expected_flags);
895
896	new_flags = read_mnt_flags("/mnt/A/AA/B");
897	ASSERT_EQ(new_flags, expected_flags);
898
899	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
900	ASSERT_EQ(new_flags, expected_flags);
901}
902
903TEST_F(mount_setattr, multi_threaded)
904{
905	int i, j, nthreads, ret = 0;
906	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
907	pthread_attr_t pattr;
908	pthread_t threads[DEFAULT_THREADS];
909
910	if (!mount_setattr_supported())
911		SKIP(return, "mount_setattr syscall not supported");
912
913	old_flags = read_mnt_flags("/mnt/A");
914	ASSERT_GT(old_flags, 0);
915
916	/* Try to change mount options from multiple threads. */
917	nthreads = get_nprocs_conf();
918	if (nthreads > DEFAULT_THREADS)
919		nthreads = DEFAULT_THREADS;
920
921	pthread_attr_init(&pattr);
922	for (i = 0; i < nthreads; i++)
923		ASSERT_EQ(pthread_create(&threads[i], &pattr, mount_setattr_thread, NULL), 0);
924
925	for (j = 0; j < i; j++) {
926		void *retptr = NULL;
927
928		EXPECT_EQ(pthread_join(threads[j], &retptr), 0);
929
930		ret += ptr_to_int(retptr);
931		EXPECT_EQ(ret, 0);
932	}
933	pthread_attr_destroy(&pattr);
934
935	ASSERT_EQ(ret, 0);
936
937	expected_flags = old_flags;
938	expected_flags |= MS_RDONLY;
939	expected_flags |= MS_NOSUID;
940	new_flags = read_mnt_flags("/mnt/A");
941	ASSERT_EQ(new_flags, expected_flags);
942
943	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
944
945	new_flags = read_mnt_flags("/mnt/A/AA");
946	ASSERT_EQ(new_flags, expected_flags);
947
948	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
949
950	new_flags = read_mnt_flags("/mnt/A/AA/B");
951	ASSERT_EQ(new_flags, expected_flags);
952
953	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
954
955	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
956	ASSERT_EQ(new_flags, expected_flags);
957
958	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
959}
960
961TEST_F(mount_setattr, wrong_user_namespace)
962{
963	int ret;
964	struct mount_attr attr = {
965		.attr_set = MOUNT_ATTR_RDONLY,
966	};
967
968	if (!mount_setattr_supported())
969		SKIP(return, "mount_setattr syscall not supported");
970
971	EXPECT_EQ(create_and_enter_userns(), 0);
972	ret = sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr));
973	ASSERT_LT(ret, 0);
974	ASSERT_EQ(errno, EPERM);
975}
976
977TEST_F(mount_setattr, wrong_mount_namespace)
978{
979	int fd, ret;
980	struct mount_attr attr = {
981		.attr_set = MOUNT_ATTR_RDONLY,
982	};
983
984	if (!mount_setattr_supported())
985		SKIP(return, "mount_setattr syscall not supported");
986
987	fd = open("/mnt/A", O_DIRECTORY | O_CLOEXEC);
988	ASSERT_GE(fd, 0);
989
990	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
991
992	ret = sys_mount_setattr(fd, "", AT_EMPTY_PATH | AT_RECURSIVE, &attr, sizeof(attr));
993	ASSERT_LT(ret, 0);
994	ASSERT_EQ(errno, EINVAL);
995}
996
997FIXTURE(mount_setattr_idmapped) {
998};
999
1000FIXTURE_SETUP(mount_setattr_idmapped)
1001{
1002	int img_fd = -EBADF;
1003
1004	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1005
1006	ASSERT_EQ(mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0), 0);
1007
1008	(void)umount2("/mnt", MNT_DETACH);
1009	(void)umount2("/tmp", MNT_DETACH);
1010
1011	ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
1012			"size=100000,mode=700"), 0);
1013
1014	ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
1015	ASSERT_EQ(mknodat(-EBADF, "/tmp/B/b", S_IFREG | 0644, 0), 0);
1016	ASSERT_EQ(chown("/tmp/B/b", 0, 0), 0);
1017
1018	ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
1019			"size=100000,mode=700"), 0);
1020
1021	ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
1022	ASSERT_EQ(mknodat(-EBADF, "/tmp/B/BB/b", S_IFREG | 0644, 0), 0);
1023	ASSERT_EQ(chown("/tmp/B/BB/b", 0, 0), 0);
1024
1025	ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
1026			"size=100000,mode=700"), 0);
1027
1028	ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
1029			"size=100000,mode=700"), 0);
1030
1031	ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
1032
1033	ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
1034			"size=100000,mode=700"), 0);
1035
1036	ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
1037
1038	ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
1039
1040	ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
1041
1042	ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
1043			MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
1044
1045	ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
1046
1047	ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
1048			MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
1049
1050	ASSERT_EQ(mkdir("/mnt/C", 0777), 0);
1051	ASSERT_EQ(mkdir("/mnt/D", 0777), 0);
1052	img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600);
1053	ASSERT_GE(img_fd, 0);
1054	ASSERT_EQ(ftruncate(img_fd, 1024 * 2048), 0);
1055	ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0);
1056	ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0);
1057	ASSERT_EQ(close(img_fd), 0);
1058}
1059
1060FIXTURE_TEARDOWN(mount_setattr_idmapped)
1061{
1062	(void)umount2("/mnt/A", MNT_DETACH);
1063	(void)umount2("/tmp", MNT_DETACH);
1064}
1065
1066/**
1067 * Validate that negative fd values are rejected.
1068 */
1069TEST_F(mount_setattr_idmapped, invalid_fd_negative)
1070{
1071	struct mount_attr attr = {
1072		.attr_set	= MOUNT_ATTR_IDMAP,
1073		.userns_fd	= -EBADF,
1074	};
1075
1076	if (!mount_setattr_supported())
1077		SKIP(return, "mount_setattr syscall not supported");
1078
1079	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1080		TH_LOG("failure: created idmapped mount with negative fd");
1081	}
1082}
1083
1084/**
1085 * Validate that excessively large fd values are rejected.
1086 */
1087TEST_F(mount_setattr_idmapped, invalid_fd_large)
1088{
1089	struct mount_attr attr = {
1090		.attr_set	= MOUNT_ATTR_IDMAP,
1091		.userns_fd	= INT64_MAX,
1092	};
1093
1094	if (!mount_setattr_supported())
1095		SKIP(return, "mount_setattr syscall not supported");
1096
1097	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1098		TH_LOG("failure: created idmapped mount with too large fd value");
1099	}
1100}
1101
1102/**
1103 * Validate that closed fd values are rejected.
1104 */
1105TEST_F(mount_setattr_idmapped, invalid_fd_closed)
1106{
1107	int fd;
1108	struct mount_attr attr = {
1109		.attr_set = MOUNT_ATTR_IDMAP,
1110	};
1111
1112	if (!mount_setattr_supported())
1113		SKIP(return, "mount_setattr syscall not supported");
1114
1115	fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
1116	ASSERT_GE(fd, 0);
1117	ASSERT_GE(close(fd), 0);
1118
1119	attr.userns_fd = fd;
1120	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1121		TH_LOG("failure: created idmapped mount with closed fd");
1122	}
1123}
1124
1125/**
1126 * Validate that the initial user namespace is rejected.
1127 */
1128TEST_F(mount_setattr_idmapped, invalid_fd_initial_userns)
1129{
1130	int open_tree_fd = -EBADF;
1131	struct mount_attr attr = {
1132		.attr_set = MOUNT_ATTR_IDMAP,
1133	};
1134
1135	if (!mount_setattr_supported())
1136		SKIP(return, "mount_setattr syscall not supported");
1137
1138	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1139				     AT_NO_AUTOMOUNT |
1140				     AT_SYMLINK_NOFOLLOW |
1141				     OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1142	ASSERT_GE(open_tree_fd, 0);
1143
1144	attr.userns_fd = open("/proc/1/ns/user", O_RDONLY | O_CLOEXEC);
1145	ASSERT_GE(attr.userns_fd, 0);
1146	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1147	ASSERT_EQ(errno, EPERM);
1148	ASSERT_EQ(close(attr.userns_fd), 0);
1149	ASSERT_EQ(close(open_tree_fd), 0);
1150}
1151
1152static int map_ids(pid_t pid, unsigned long nsid, unsigned long hostid,
1153		   unsigned long range)
1154{
1155	char map[100], procfile[256];
1156
1157	snprintf(procfile, sizeof(procfile), "/proc/%d/uid_map", pid);
1158	snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
1159	if (write_file(procfile, map, strlen(map)))
1160		return -1;
1161
1162
1163	snprintf(procfile, sizeof(procfile), "/proc/%d/gid_map", pid);
1164	snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
1165	if (write_file(procfile, map, strlen(map)))
1166		return -1;
1167
1168	return 0;
1169}
1170
1171#define __STACK_SIZE (8 * 1024 * 1024)
1172static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
1173{
1174	void *stack;
1175
1176	stack = malloc(__STACK_SIZE);
1177	if (!stack)
1178		return -ENOMEM;
1179
1180#ifdef __ia64__
1181	return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
1182#else
1183	return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
1184#endif
1185}
1186
1187static int get_userns_fd_cb(void *data)
1188{
1189	return kill(getpid(), SIGSTOP);
1190}
1191
1192static int wait_for_pid(pid_t pid)
1193{
1194	int status, ret;
1195
1196again:
1197	ret = waitpid(pid, &status, 0);
1198	if (ret == -1) {
1199		if (errno == EINTR)
1200			goto again;
1201
1202		return -1;
1203	}
1204
1205	if (!WIFEXITED(status))
1206		return -1;
1207
1208	return WEXITSTATUS(status);
1209}
1210
1211static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range)
1212{
1213	int ret;
1214	pid_t pid;
1215	char path[256];
1216
1217	pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER);
1218	if (pid < 0)
1219		return -errno;
1220
1221	ret = map_ids(pid, nsid, hostid, range);
1222	if (ret < 0)
1223		return ret;
1224
1225	snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
1226	ret = open(path, O_RDONLY | O_CLOEXEC);
1227	kill(pid, SIGKILL);
1228	wait_for_pid(pid);
1229	return ret;
1230}
1231
1232/**
1233 * Validate that an attached mount in our mount namespace cannot be idmapped.
1234 * (The kernel enforces that the mount's mount namespace and the caller's mount
1235 *  namespace match.)
1236 */
1237TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace)
1238{
1239	int open_tree_fd = -EBADF;
1240	struct mount_attr attr = {
1241		.attr_set = MOUNT_ATTR_IDMAP,
1242	};
1243
1244	if (!mount_setattr_supported())
1245		SKIP(return, "mount_setattr syscall not supported");
1246
1247	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1248				     AT_EMPTY_PATH |
1249				     AT_NO_AUTOMOUNT |
1250				     AT_SYMLINK_NOFOLLOW |
1251				     OPEN_TREE_CLOEXEC);
1252	ASSERT_GE(open_tree_fd, 0);
1253
1254	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1255	ASSERT_GE(attr.userns_fd, 0);
1256	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1257	ASSERT_EQ(close(attr.userns_fd), 0);
1258	ASSERT_EQ(close(open_tree_fd), 0);
1259}
1260
1261/**
1262 * Validate that idmapping a mount is rejected if the mount's mount namespace
1263 * and our mount namespace don't match.
1264 * (The kernel enforces that the mount's mount namespace and the caller's mount
1265 *  namespace match.)
1266 */
1267TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace)
1268{
1269	int open_tree_fd = -EBADF;
1270	struct mount_attr attr = {
1271		.attr_set = MOUNT_ATTR_IDMAP,
1272	};
1273
1274	if (!mount_setattr_supported())
1275		SKIP(return, "mount_setattr syscall not supported");
1276
1277	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1278				     AT_EMPTY_PATH |
1279				     AT_NO_AUTOMOUNT |
1280				     AT_SYMLINK_NOFOLLOW |
1281				     OPEN_TREE_CLOEXEC);
1282	ASSERT_GE(open_tree_fd, 0);
1283
1284	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1285
1286	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1287	ASSERT_GE(attr.userns_fd, 0);
1288	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
1289				    sizeof(attr)), 0);
1290	ASSERT_EQ(close(attr.userns_fd), 0);
1291	ASSERT_EQ(close(open_tree_fd), 0);
1292}
1293
1294/**
1295 * Validate that an attached mount in our mount namespace can be idmapped.
1296 */
1297TEST_F(mount_setattr_idmapped, detached_mount_inside_current_mount_namespace)
1298{
1299	int open_tree_fd = -EBADF;
1300	struct mount_attr attr = {
1301		.attr_set = MOUNT_ATTR_IDMAP,
1302	};
1303
1304	if (!mount_setattr_supported())
1305		SKIP(return, "mount_setattr syscall not supported");
1306
1307	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1308				     AT_EMPTY_PATH |
1309				     AT_NO_AUTOMOUNT |
1310				     AT_SYMLINK_NOFOLLOW |
1311				     OPEN_TREE_CLOEXEC |
1312				     OPEN_TREE_CLONE);
1313	ASSERT_GE(open_tree_fd, 0);
1314
1315	/* Changing mount properties on a detached mount. */
1316	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1317	ASSERT_GE(attr.userns_fd, 0);
1318	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1319				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1320	ASSERT_EQ(close(attr.userns_fd), 0);
1321	ASSERT_EQ(close(open_tree_fd), 0);
1322}
1323
1324/**
1325 * Validate that a detached mount not in our mount namespace can be idmapped.
1326 */
1327TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace)
1328{
1329	int open_tree_fd = -EBADF;
1330	struct mount_attr attr = {
1331		.attr_set = MOUNT_ATTR_IDMAP,
1332	};
1333
1334	if (!mount_setattr_supported())
1335		SKIP(return, "mount_setattr syscall not supported");
1336
1337	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1338				     AT_EMPTY_PATH |
1339				     AT_NO_AUTOMOUNT |
1340				     AT_SYMLINK_NOFOLLOW |
1341				     OPEN_TREE_CLOEXEC |
1342				     OPEN_TREE_CLONE);
1343	ASSERT_GE(open_tree_fd, 0);
1344
1345	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1346
1347	/* Changing mount properties on a detached mount. */
1348	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1349	ASSERT_GE(attr.userns_fd, 0);
1350	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1351				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1352	ASSERT_EQ(close(attr.userns_fd), 0);
1353	ASSERT_EQ(close(open_tree_fd), 0);
1354}
1355
1356/**
1357 * Validate that currently changing the idmapping of an idmapped mount fails.
1358 */
1359TEST_F(mount_setattr_idmapped, change_idmapping)
1360{
1361	int open_tree_fd = -EBADF;
1362	struct mount_attr attr = {
1363		.attr_set = MOUNT_ATTR_IDMAP,
1364	};
1365
1366	if (!mount_setattr_supported())
1367		SKIP(return, "mount_setattr syscall not supported");
1368
1369	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1370				     AT_EMPTY_PATH |
1371				     AT_NO_AUTOMOUNT |
1372				     AT_SYMLINK_NOFOLLOW |
1373				     OPEN_TREE_CLOEXEC |
1374				     OPEN_TREE_CLONE);
1375	ASSERT_GE(open_tree_fd, 0);
1376
1377	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1378	ASSERT_GE(attr.userns_fd, 0);
1379	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1380				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1381	ASSERT_EQ(close(attr.userns_fd), 0);
1382
1383	/* Change idmapping on a detached mount that is already idmapped. */
1384	attr.userns_fd	= get_userns_fd(0, 20000, 10000);
1385	ASSERT_GE(attr.userns_fd, 0);
1386	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1387	ASSERT_EQ(close(attr.userns_fd), 0);
1388	ASSERT_EQ(close(open_tree_fd), 0);
1389}
1390
1391static bool expected_uid_gid(int dfd, const char *path, int flags,
1392			     uid_t expected_uid, gid_t expected_gid)
1393{
1394	int ret;
1395	struct stat st;
1396
1397	ret = fstatat(dfd, path, &st, flags);
1398	if (ret < 0)
1399		return false;
1400
1401	return st.st_uid == expected_uid && st.st_gid == expected_gid;
1402}
1403
1404TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid)
1405{
1406	int open_tree_fd = -EBADF;
1407	struct mount_attr attr = {
1408		.attr_set = MOUNT_ATTR_IDMAP,
1409	};
1410
1411	if (!mount_setattr_supported())
1412		SKIP(return, "mount_setattr syscall not supported");
1413
1414	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
1415	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
1416
1417	open_tree_fd = sys_open_tree(-EBADF, "/mnt/A",
1418				     AT_RECURSIVE |
1419				     AT_EMPTY_PATH |
1420				     AT_NO_AUTOMOUNT |
1421				     AT_SYMLINK_NOFOLLOW |
1422				     OPEN_TREE_CLOEXEC |
1423				     OPEN_TREE_CLONE);
1424	ASSERT_GE(open_tree_fd, 0);
1425
1426	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1427	ASSERT_GE(attr.userns_fd, 0);
1428	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1429	ASSERT_EQ(close(attr.userns_fd), 0);
1430	ASSERT_EQ(close(open_tree_fd), 0);
1431
1432	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
1433	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
1434	ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/b", 0, 0, 0), 0);
1435	ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0);
1436}
1437
1438TEST_F(mount_setattr, mount_attr_nosymfollow)
1439{
1440	int fd;
1441	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
1442	struct mount_attr attr = {
1443		.attr_set	= MOUNT_ATTR_NOSYMFOLLOW,
1444	};
1445
1446	if (!mount_setattr_supported())
1447		SKIP(return, "mount_setattr syscall not supported");
1448
1449	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1450	ASSERT_GT(fd, 0);
1451	ASSERT_EQ(close(fd), 0);
1452
1453	old_flags = read_mnt_flags("/mnt/A");
1454	ASSERT_GT(old_flags, 0);
1455
1456	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
1457
1458	expected_flags = old_flags;
1459	expected_flags |= ST_NOSYMFOLLOW;
1460
1461	new_flags = read_mnt_flags("/mnt/A");
1462	ASSERT_EQ(new_flags, expected_flags);
1463
1464	new_flags = read_mnt_flags("/mnt/A/AA");
1465	ASSERT_EQ(new_flags, expected_flags);
1466
1467	new_flags = read_mnt_flags("/mnt/A/AA/B");
1468	ASSERT_EQ(new_flags, expected_flags);
1469
1470	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
1471	ASSERT_EQ(new_flags, expected_flags);
1472
1473	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1474	ASSERT_LT(fd, 0);
1475	ASSERT_EQ(errno, ELOOP);
1476
1477	attr.attr_set &= ~MOUNT_ATTR_NOSYMFOLLOW;
1478	attr.attr_clr |= MOUNT_ATTR_NOSYMFOLLOW;
1479
1480	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
1481
1482	expected_flags &= ~ST_NOSYMFOLLOW;
1483	new_flags = read_mnt_flags("/mnt/A");
1484	ASSERT_EQ(new_flags, expected_flags);
1485
1486	new_flags = read_mnt_flags("/mnt/A/AA");
1487	ASSERT_EQ(new_flags, expected_flags);
1488
1489	new_flags = read_mnt_flags("/mnt/A/AA/B");
1490	ASSERT_EQ(new_flags, expected_flags);
1491
1492	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
1493	ASSERT_EQ(new_flags, expected_flags);
1494
1495	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1496	ASSERT_GT(fd, 0);
1497	ASSERT_EQ(close(fd), 0);
1498}
1499
1500TEST_HARNESS_MAIN
1501