OSDN Git Service

Merge tag 'trace-v6.3-rc5-2' of git://git.kernel.org/pub/scm/linux/kernel/git/trace...
[tomoyo/tomoyo-test1.git] / tools / testing / selftests / mount_setattr / mount_setattr_test.c
1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #include <sched.h>
4 #include <stdio.h>
5 #include <errno.h>
6 #include <pthread.h>
7 #include <string.h>
8 #include <sys/stat.h>
9 #include <sys/types.h>
10 #include <sys/mount.h>
11 #include <sys/wait.h>
12 #include <sys/vfs.h>
13 #include <sys/statvfs.h>
14 #include <sys/sysinfo.h>
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <fcntl.h>
18 #include <grp.h>
19 #include <stdbool.h>
20 #include <stdarg.h>
21 #include <linux/mount.h>
22
23 #include "../kselftest_harness.h"
24
25 #ifndef CLONE_NEWNS
26 #define CLONE_NEWNS 0x00020000
27 #endif
28
29 #ifndef CLONE_NEWUSER
30 #define CLONE_NEWUSER 0x10000000
31 #endif
32
33 #ifndef MS_REC
34 #define MS_REC 16384
35 #endif
36
37 #ifndef MS_RELATIME
38 #define MS_RELATIME (1 << 21)
39 #endif
40
41 #ifndef MS_STRICTATIME
42 #define MS_STRICTATIME (1 << 24)
43 #endif
44
45 #ifndef MOUNT_ATTR_RDONLY
46 #define MOUNT_ATTR_RDONLY 0x00000001
47 #endif
48
49 #ifndef MOUNT_ATTR_NOSUID
50 #define MOUNT_ATTR_NOSUID 0x00000002
51 #endif
52
53 #ifndef MOUNT_ATTR_NOEXEC
54 #define MOUNT_ATTR_NOEXEC 0x00000008
55 #endif
56
57 #ifndef MOUNT_ATTR_NODIRATIME
58 #define MOUNT_ATTR_NODIRATIME 0x00000080
59 #endif
60
61 #ifndef MOUNT_ATTR__ATIME
62 #define MOUNT_ATTR__ATIME 0x00000070
63 #endif
64
65 #ifndef MOUNT_ATTR_RELATIME
66 #define MOUNT_ATTR_RELATIME 0x00000000
67 #endif
68
69 #ifndef MOUNT_ATTR_NOATIME
70 #define MOUNT_ATTR_NOATIME 0x00000010
71 #endif
72
73 #ifndef MOUNT_ATTR_STRICTATIME
74 #define MOUNT_ATTR_STRICTATIME 0x00000020
75 #endif
76
77 #ifndef AT_RECURSIVE
78 #define AT_RECURSIVE 0x8000
79 #endif
80
81 #ifndef MS_SHARED
82 #define MS_SHARED (1 << 20)
83 #endif
84
85 #define DEFAULT_THREADS 4
86 #define ptr_to_int(p) ((int)((intptr_t)(p)))
87 #define int_to_ptr(u) ((void *)((intptr_t)(u)))
88
89 #ifndef __NR_mount_setattr
90         #if defined __alpha__
91                 #define __NR_mount_setattr 552
92         #elif defined _MIPS_SIM
93                 #if _MIPS_SIM == _MIPS_SIM_ABI32        /* o32 */
94                         #define __NR_mount_setattr (442 + 4000)
95                 #endif
96                 #if _MIPS_SIM == _MIPS_SIM_NABI32       /* n32 */
97                         #define __NR_mount_setattr (442 + 6000)
98                 #endif
99                 #if _MIPS_SIM == _MIPS_SIM_ABI64        /* n64 */
100                         #define __NR_mount_setattr (442 + 5000)
101                 #endif
102         #elif defined __ia64__
103                 #define __NR_mount_setattr (442 + 1024)
104         #else
105                 #define __NR_mount_setattr 442
106         #endif
107 #endif
108
109 #ifndef __NR_open_tree
110         #if defined __alpha__
111                 #define __NR_open_tree 538
112         #elif defined _MIPS_SIM
113                 #if _MIPS_SIM == _MIPS_SIM_ABI32        /* o32 */
114                         #define __NR_open_tree 4428
115                 #endif
116                 #if _MIPS_SIM == _MIPS_SIM_NABI32       /* n32 */
117                         #define __NR_open_tree 6428
118                 #endif
119                 #if _MIPS_SIM == _MIPS_SIM_ABI64        /* n64 */
120                         #define __NR_open_tree 5428
121                 #endif
122         #elif defined __ia64__
123                 #define __NR_open_tree (428 + 1024)
124         #else
125                 #define __NR_open_tree 428
126         #endif
127 #endif
128
129 #ifndef MOUNT_ATTR_IDMAP
130 #define MOUNT_ATTR_IDMAP 0x00100000
131 #endif
132
133 #ifndef MOUNT_ATTR_NOSYMFOLLOW
134 #define MOUNT_ATTR_NOSYMFOLLOW 0x00200000
135 #endif
136
137 static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags,
138                                     struct mount_attr *attr, size_t size)
139 {
140         return syscall(__NR_mount_setattr, dfd, path, flags, attr, size);
141 }
142
143 #ifndef OPEN_TREE_CLONE
144 #define OPEN_TREE_CLONE 1
145 #endif
146
147 #ifndef OPEN_TREE_CLOEXEC
148 #define OPEN_TREE_CLOEXEC O_CLOEXEC
149 #endif
150
151 #ifndef AT_RECURSIVE
152 #define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
153 #endif
154
155 static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
156 {
157         return syscall(__NR_open_tree, dfd, filename, flags);
158 }
159
160 static ssize_t write_nointr(int fd, const void *buf, size_t count)
161 {
162         ssize_t ret;
163
164         do {
165                 ret = write(fd, buf, count);
166         } while (ret < 0 && errno == EINTR);
167
168         return ret;
169 }
170
171 static int write_file(const char *path, const void *buf, size_t count)
172 {
173         int fd;
174         ssize_t ret;
175
176         fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
177         if (fd < 0)
178                 return -1;
179
180         ret = write_nointr(fd, buf, count);
181         close(fd);
182         if (ret < 0 || (size_t)ret != count)
183                 return -1;
184
185         return 0;
186 }
187
188 static int create_and_enter_userns(void)
189 {
190         uid_t uid;
191         gid_t gid;
192         char map[100];
193
194         uid = getuid();
195         gid = getgid();
196
197         if (unshare(CLONE_NEWUSER))
198                 return -1;
199
200         if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
201             errno != ENOENT)
202                 return -1;
203
204         snprintf(map, sizeof(map), "0 %d 1", uid);
205         if (write_file("/proc/self/uid_map", map, strlen(map)))
206                 return -1;
207
208
209         snprintf(map, sizeof(map), "0 %d 1", gid);
210         if (write_file("/proc/self/gid_map", map, strlen(map)))
211                 return -1;
212
213         if (setgid(0))
214                 return -1;
215
216         if (setuid(0))
217                 return -1;
218
219         return 0;
220 }
221
222 static int prepare_unpriv_mountns(void)
223 {
224         if (create_and_enter_userns())
225                 return -1;
226
227         if (unshare(CLONE_NEWNS))
228                 return -1;
229
230         if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
231                 return -1;
232
233         return 0;
234 }
235
236 #ifndef ST_NOSYMFOLLOW
237 #define ST_NOSYMFOLLOW 0x2000 /* do not follow symlinks */
238 #endif
239
240 static int read_mnt_flags(const char *path)
241 {
242         int ret;
243         struct statvfs stat;
244         unsigned int mnt_flags;
245
246         ret = statvfs(path, &stat);
247         if (ret != 0)
248                 return -EINVAL;
249
250         if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC |
251                             ST_NOATIME | ST_NODIRATIME | ST_RELATIME |
252                             ST_SYNCHRONOUS | ST_MANDLOCK | ST_NOSYMFOLLOW))
253                 return -EINVAL;
254
255         mnt_flags = 0;
256         if (stat.f_flag & ST_RDONLY)
257                 mnt_flags |= MS_RDONLY;
258         if (stat.f_flag & ST_NOSUID)
259                 mnt_flags |= MS_NOSUID;
260         if (stat.f_flag & ST_NODEV)
261                 mnt_flags |= MS_NODEV;
262         if (stat.f_flag & ST_NOEXEC)
263                 mnt_flags |= MS_NOEXEC;
264         if (stat.f_flag & ST_NOATIME)
265                 mnt_flags |= MS_NOATIME;
266         if (stat.f_flag & ST_NODIRATIME)
267                 mnt_flags |= MS_NODIRATIME;
268         if (stat.f_flag & ST_RELATIME)
269                 mnt_flags |= MS_RELATIME;
270         if (stat.f_flag & ST_SYNCHRONOUS)
271                 mnt_flags |= MS_SYNCHRONOUS;
272         if (stat.f_flag & ST_MANDLOCK)
273                 mnt_flags |= ST_MANDLOCK;
274         if (stat.f_flag & ST_NOSYMFOLLOW)
275                 mnt_flags |= ST_NOSYMFOLLOW;
276
277         return mnt_flags;
278 }
279
280 static char *get_field(char *src, int nfields)
281 {
282         int i;
283         char *p = src;
284
285         for (i = 0; i < nfields; i++) {
286                 while (*p && *p != ' ' && *p != '\t')
287                         p++;
288
289                 if (!*p)
290                         break;
291
292                 p++;
293         }
294
295         return p;
296 }
297
298 static void null_endofword(char *word)
299 {
300         while (*word && *word != ' ' && *word != '\t')
301                 word++;
302         *word = '\0';
303 }
304
305 static bool is_shared_mount(const char *path)
306 {
307         size_t len = 0;
308         char *line = NULL;
309         FILE *f = NULL;
310
311         f = fopen("/proc/self/mountinfo", "re");
312         if (!f)
313                 return false;
314
315         while (getline(&line, &len, f) != -1) {
316                 char *opts, *target;
317
318                 target = get_field(line, 4);
319                 if (!target)
320                         continue;
321
322                 opts = get_field(target, 2);
323                 if (!opts)
324                         continue;
325
326                 null_endofword(target);
327
328                 if (strcmp(target, path) != 0)
329                         continue;
330
331                 null_endofword(opts);
332                 if (strstr(opts, "shared:"))
333                         return true;
334         }
335
336         free(line);
337         fclose(f);
338
339         return false;
340 }
341
342 static void *mount_setattr_thread(void *data)
343 {
344         struct mount_attr attr = {
345                 .attr_set       = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID,
346                 .attr_clr       = 0,
347                 .propagation    = MS_SHARED,
348         };
349
350         if (sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)))
351                 pthread_exit(int_to_ptr(-1));
352
353         pthread_exit(int_to_ptr(0));
354 }
355
356 /* Attempt to de-conflict with the selftests tree. */
357 #ifndef SKIP
358 #define SKIP(s, ...)    XFAIL(s, ##__VA_ARGS__)
359 #endif
360
361 static bool mount_setattr_supported(void)
362 {
363         int ret;
364
365         ret = sys_mount_setattr(-EBADF, "", AT_EMPTY_PATH, NULL, 0);
366         if (ret < 0 && errno == ENOSYS)
367                 return false;
368
369         return true;
370 }
371
372 FIXTURE(mount_setattr) {
373 };
374
375 #define NOSYMFOLLOW_TARGET "/mnt/A/AA/data"
376 #define NOSYMFOLLOW_SYMLINK "/mnt/A/AA/symlink"
377
378 FIXTURE_SETUP(mount_setattr)
379 {
380         int fd = -EBADF;
381
382         if (!mount_setattr_supported())
383                 SKIP(return, "mount_setattr syscall not supported");
384
385         ASSERT_EQ(prepare_unpriv_mountns(), 0);
386
387         (void)umount2("/mnt", MNT_DETACH);
388         (void)umount2("/tmp", MNT_DETACH);
389
390         ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
391                         "size=100000,mode=700"), 0);
392
393         ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
394
395         ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
396                         "size=100000,mode=700"), 0);
397
398         ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
399
400         ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
401                         "size=100000,mode=700"), 0);
402
403         ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
404                         "size=100000,mode=700"), 0);
405
406         ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
407
408         ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
409                         "size=100000,mode=700"), 0);
410
411         ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
412
413         ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
414
415         ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
416
417         ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
418                         MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
419
420         ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
421
422         ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
423                         MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
424
425         fd = creat(NOSYMFOLLOW_TARGET, O_RDWR | O_CLOEXEC);
426         ASSERT_GT(fd, 0);
427         ASSERT_EQ(symlink(NOSYMFOLLOW_TARGET, NOSYMFOLLOW_SYMLINK), 0);
428         ASSERT_EQ(close(fd), 0);
429 }
430
431 FIXTURE_TEARDOWN(mount_setattr)
432 {
433         if (!mount_setattr_supported())
434                 SKIP(return, "mount_setattr syscall not supported");
435
436         (void)umount2("/mnt/A", MNT_DETACH);
437         (void)umount2("/tmp", MNT_DETACH);
438 }
439
440 TEST_F(mount_setattr, invalid_attributes)
441 {
442         struct mount_attr invalid_attr = {
443                 .attr_set = (1U << 31),
444         };
445
446         if (!mount_setattr_supported())
447                 SKIP(return, "mount_setattr syscall not supported");
448
449         ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
450                                     sizeof(invalid_attr)), 0);
451
452         invalid_attr.attr_set   = 0;
453         invalid_attr.attr_clr   = (1U << 31);
454         ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
455                                     sizeof(invalid_attr)), 0);
456
457         invalid_attr.attr_clr           = 0;
458         invalid_attr.propagation        = (1U << 31);
459         ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
460                                     sizeof(invalid_attr)), 0);
461
462         invalid_attr.attr_set           = (1U << 31);
463         invalid_attr.attr_clr           = (1U << 31);
464         invalid_attr.propagation        = (1U << 31);
465         ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
466                                     sizeof(invalid_attr)), 0);
467
468         ASSERT_NE(sys_mount_setattr(-1, "mnt/A", AT_RECURSIVE, &invalid_attr,
469                                     sizeof(invalid_attr)), 0);
470 }
471
472 TEST_F(mount_setattr, extensibility)
473 {
474         unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
475         char *s = "dummy";
476         struct mount_attr invalid_attr = {};
477         struct mount_attr_large {
478                 struct mount_attr attr1;
479                 struct mount_attr attr2;
480                 struct mount_attr attr3;
481         } large_attr = {};
482
483         if (!mount_setattr_supported())
484                 SKIP(return, "mount_setattr syscall not supported");
485
486         old_flags = read_mnt_flags("/mnt/A");
487         ASSERT_GT(old_flags, 0);
488
489         ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, NULL,
490                                     sizeof(invalid_attr)), 0);
491         ASSERT_EQ(errno, EFAULT);
492
493         ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, (void *)s,
494                                     sizeof(invalid_attr)), 0);
495         ASSERT_EQ(errno, EINVAL);
496
497         ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 0), 0);
498         ASSERT_EQ(errno, EINVAL);
499
500         ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
501                                     sizeof(invalid_attr) / 2), 0);
502         ASSERT_EQ(errno, EINVAL);
503
504         ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
505                                     sizeof(invalid_attr) / 2), 0);
506         ASSERT_EQ(errno, EINVAL);
507
508         ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
509                                     (void *)&large_attr, sizeof(large_attr)), 0);
510
511         large_attr.attr3.attr_set = MOUNT_ATTR_RDONLY;
512         ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
513                                     (void *)&large_attr, sizeof(large_attr)), 0);
514
515         large_attr.attr3.attr_set = 0;
516         large_attr.attr1.attr_set = MOUNT_ATTR_RDONLY;
517         ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
518                                     (void *)&large_attr, sizeof(large_attr)), 0);
519
520         expected_flags = old_flags;
521         expected_flags |= MS_RDONLY;
522
523         new_flags = read_mnt_flags("/mnt/A");
524         ASSERT_EQ(new_flags, expected_flags);
525
526         new_flags = read_mnt_flags("/mnt/A/AA");
527         ASSERT_EQ(new_flags, expected_flags);
528
529         new_flags = read_mnt_flags("/mnt/A/AA/B");
530         ASSERT_EQ(new_flags, expected_flags);
531
532         new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
533         ASSERT_EQ(new_flags, expected_flags);
534 }
535
536 TEST_F(mount_setattr, basic)
537 {
538         unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
539         struct mount_attr attr = {
540                 .attr_set       = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
541                 .attr_clr       = MOUNT_ATTR__ATIME,
542         };
543
544         if (!mount_setattr_supported())
545                 SKIP(return, "mount_setattr syscall not supported");
546
547         old_flags = read_mnt_flags("/mnt/A");
548         ASSERT_GT(old_flags, 0);
549
550         ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", 0, &attr, sizeof(attr)), 0);
551
552         expected_flags = old_flags;
553         expected_flags |= MS_RDONLY;
554         expected_flags |= MS_NOEXEC;
555         expected_flags &= ~MS_NOATIME;
556         expected_flags |= MS_RELATIME;
557
558         new_flags = read_mnt_flags("/mnt/A");
559         ASSERT_EQ(new_flags, expected_flags);
560
561         new_flags = read_mnt_flags("/mnt/A/AA");
562         ASSERT_EQ(new_flags, old_flags);
563
564         new_flags = read_mnt_flags("/mnt/A/AA/B");
565         ASSERT_EQ(new_flags, old_flags);
566
567         new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
568         ASSERT_EQ(new_flags, old_flags);
569 }
570
571 TEST_F(mount_setattr, basic_recursive)
572 {
573         int fd;
574         unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
575         struct mount_attr attr = {
576                 .attr_set       = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
577                 .attr_clr       = MOUNT_ATTR__ATIME,
578         };
579
580         if (!mount_setattr_supported())
581                 SKIP(return, "mount_setattr syscall not supported");
582
583         old_flags = read_mnt_flags("/mnt/A");
584         ASSERT_GT(old_flags, 0);
585
586         ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
587
588         expected_flags = old_flags;
589         expected_flags |= MS_RDONLY;
590         expected_flags |= MS_NOEXEC;
591         expected_flags &= ~MS_NOATIME;
592         expected_flags |= MS_RELATIME;
593
594         new_flags = read_mnt_flags("/mnt/A");
595         ASSERT_EQ(new_flags, expected_flags);
596
597         new_flags = read_mnt_flags("/mnt/A/AA");
598         ASSERT_EQ(new_flags, expected_flags);
599
600         new_flags = read_mnt_flags("/mnt/A/AA/B");
601         ASSERT_EQ(new_flags, expected_flags);
602
603         new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
604         ASSERT_EQ(new_flags, expected_flags);
605
606         memset(&attr, 0, sizeof(attr));
607         attr.attr_clr = MOUNT_ATTR_RDONLY;
608         attr.propagation = MS_SHARED;
609         ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
610
611         expected_flags &= ~MS_RDONLY;
612         new_flags = read_mnt_flags("/mnt/A");
613         ASSERT_EQ(new_flags, expected_flags);
614
615         ASSERT_EQ(is_shared_mount("/mnt/A"), true);
616
617         new_flags = read_mnt_flags("/mnt/A/AA");
618         ASSERT_EQ(new_flags, expected_flags);
619
620         ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
621
622         new_flags = read_mnt_flags("/mnt/A/AA/B");
623         ASSERT_EQ(new_flags, expected_flags);
624
625         ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
626
627         new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
628         ASSERT_EQ(new_flags, expected_flags);
629
630         ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
631
632         fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
633         ASSERT_GE(fd, 0);
634
635         /*
636          * We're holding a fd open for writing so this needs to fail somewhere
637          * in the middle and the mount options need to be unchanged.
638          */
639         attr.attr_set = MOUNT_ATTR_RDONLY;
640         ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
641
642         new_flags = read_mnt_flags("/mnt/A");
643         ASSERT_EQ(new_flags, expected_flags);
644
645         ASSERT_EQ(is_shared_mount("/mnt/A"), true);
646
647         new_flags = read_mnt_flags("/mnt/A/AA");
648         ASSERT_EQ(new_flags, expected_flags);
649
650         ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
651
652         new_flags = read_mnt_flags("/mnt/A/AA/B");
653         ASSERT_EQ(new_flags, expected_flags);
654
655         ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
656
657         new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
658         ASSERT_EQ(new_flags, expected_flags);
659
660         ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
661
662         EXPECT_EQ(close(fd), 0);
663 }
664
665 TEST_F(mount_setattr, mount_has_writers)
666 {
667         int fd, dfd;
668         unsigned int old_flags = 0, new_flags = 0;
669         struct mount_attr attr = {
670                 .attr_set       = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
671                 .attr_clr       = MOUNT_ATTR__ATIME,
672                 .propagation    = MS_SHARED,
673         };
674
675         if (!mount_setattr_supported())
676                 SKIP(return, "mount_setattr syscall not supported");
677
678         old_flags = read_mnt_flags("/mnt/A");
679         ASSERT_GT(old_flags, 0);
680
681         fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
682         ASSERT_GE(fd, 0);
683
684         /*
685          * We're holding a fd open to a mount somwhere in the middle so this
686          * needs to fail somewhere in the middle. After this the mount options
687          * need to be unchanged.
688          */
689         ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
690
691         new_flags = read_mnt_flags("/mnt/A");
692         ASSERT_EQ(new_flags, old_flags);
693
694         ASSERT_EQ(is_shared_mount("/mnt/A"), false);
695
696         new_flags = read_mnt_flags("/mnt/A/AA");
697         ASSERT_EQ(new_flags, old_flags);
698
699         ASSERT_EQ(is_shared_mount("/mnt/A/AA"), false);
700
701         new_flags = read_mnt_flags("/mnt/A/AA/B");
702         ASSERT_EQ(new_flags, old_flags);
703
704         ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), false);
705
706         new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
707         ASSERT_EQ(new_flags, old_flags);
708
709         ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), false);
710
711         dfd = open("/mnt/A/AA/B", O_DIRECTORY | O_CLOEXEC);
712         ASSERT_GE(dfd, 0);
713         EXPECT_EQ(fsync(dfd), 0);
714         EXPECT_EQ(close(dfd), 0);
715
716         EXPECT_EQ(fsync(fd), 0);
717         EXPECT_EQ(close(fd), 0);
718
719         /* All writers are gone so this should succeed. */
720         ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
721 }
722
723 TEST_F(mount_setattr, mixed_mount_options)
724 {
725         unsigned int old_flags1 = 0, old_flags2 = 0, new_flags = 0, expected_flags = 0;
726         struct mount_attr attr = {
727                 .attr_clr = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME,
728                 .attr_set = MOUNT_ATTR_RELATIME,
729         };
730
731         if (!mount_setattr_supported())
732                 SKIP(return, "mount_setattr syscall not supported");
733
734         old_flags1 = read_mnt_flags("/mnt/B");
735         ASSERT_GT(old_flags1, 0);
736
737         old_flags2 = read_mnt_flags("/mnt/B/BB");
738         ASSERT_GT(old_flags2, 0);
739
740         ASSERT_EQ(sys_mount_setattr(-1, "/mnt/B", AT_RECURSIVE, &attr, sizeof(attr)), 0);
741
742         expected_flags = old_flags2;
743         expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
744         expected_flags |= MS_RELATIME;
745
746         new_flags = read_mnt_flags("/mnt/B");
747         ASSERT_EQ(new_flags, expected_flags);
748
749         expected_flags = old_flags2;
750         expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
751         expected_flags |= MS_RELATIME;
752
753         new_flags = read_mnt_flags("/mnt/B/BB");
754         ASSERT_EQ(new_flags, expected_flags);
755 }
756
757 TEST_F(mount_setattr, time_changes)
758 {
759         unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
760         struct mount_attr attr = {
761                 .attr_set       = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME,
762         };
763
764         if (!mount_setattr_supported())
765                 SKIP(return, "mount_setattr syscall not supported");
766
767         ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
768
769         attr.attr_set = MOUNT_ATTR_STRICTATIME;
770         ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
771
772         attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
773         ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
774
775         attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
776         attr.attr_clr = MOUNT_ATTR__ATIME;
777         ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
778
779         attr.attr_set = 0;
780         attr.attr_clr = MOUNT_ATTR_STRICTATIME;
781         ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
782
783         attr.attr_clr = MOUNT_ATTR_NOATIME;
784         ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
785
786         old_flags = read_mnt_flags("/mnt/A");
787         ASSERT_GT(old_flags, 0);
788
789         attr.attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME;
790         attr.attr_clr = MOUNT_ATTR__ATIME;
791         ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
792
793         expected_flags = old_flags;
794         expected_flags |= MS_NOATIME;
795         expected_flags |= MS_NODIRATIME;
796
797         new_flags = read_mnt_flags("/mnt/A");
798         ASSERT_EQ(new_flags, expected_flags);
799
800         new_flags = read_mnt_flags("/mnt/A/AA");
801         ASSERT_EQ(new_flags, expected_flags);
802
803         new_flags = read_mnt_flags("/mnt/A/AA/B");
804         ASSERT_EQ(new_flags, expected_flags);
805
806         new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
807         ASSERT_EQ(new_flags, expected_flags);
808
809         memset(&attr, 0, sizeof(attr));
810         attr.attr_set &= ~MOUNT_ATTR_NOATIME;
811         attr.attr_set |= MOUNT_ATTR_RELATIME;
812         attr.attr_clr |= MOUNT_ATTR__ATIME;
813         ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
814
815         expected_flags &= ~MS_NOATIME;
816         expected_flags |= MS_RELATIME;
817
818         new_flags = read_mnt_flags("/mnt/A");
819         ASSERT_EQ(new_flags, expected_flags);
820
821         new_flags = read_mnt_flags("/mnt/A/AA");
822         ASSERT_EQ(new_flags, expected_flags);
823
824         new_flags = read_mnt_flags("/mnt/A/AA/B");
825         ASSERT_EQ(new_flags, expected_flags);
826
827         new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
828         ASSERT_EQ(new_flags, expected_flags);
829
830         memset(&attr, 0, sizeof(attr));
831         attr.attr_set &= ~MOUNT_ATTR_RELATIME;
832         attr.attr_set |= MOUNT_ATTR_STRICTATIME;
833         attr.attr_clr |= MOUNT_ATTR__ATIME;
834         ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
835
836         expected_flags &= ~MS_RELATIME;
837
838         new_flags = read_mnt_flags("/mnt/A");
839         ASSERT_EQ(new_flags, expected_flags);
840
841         new_flags = read_mnt_flags("/mnt/A/AA");
842         ASSERT_EQ(new_flags, expected_flags);
843
844         new_flags = read_mnt_flags("/mnt/A/AA/B");
845         ASSERT_EQ(new_flags, expected_flags);
846
847         new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
848         ASSERT_EQ(new_flags, expected_flags);
849
850         memset(&attr, 0, sizeof(attr));
851         attr.attr_set &= ~MOUNT_ATTR_STRICTATIME;
852         attr.attr_set |= MOUNT_ATTR_NOATIME;
853         attr.attr_clr |= MOUNT_ATTR__ATIME;
854         ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
855
856         expected_flags |= MS_NOATIME;
857         new_flags = read_mnt_flags("/mnt/A");
858         ASSERT_EQ(new_flags, expected_flags);
859
860         new_flags = read_mnt_flags("/mnt/A/AA");
861         ASSERT_EQ(new_flags, expected_flags);
862
863         new_flags = read_mnt_flags("/mnt/A/AA/B");
864         ASSERT_EQ(new_flags, expected_flags);
865
866         new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
867         ASSERT_EQ(new_flags, expected_flags);
868
869         memset(&attr, 0, sizeof(attr));
870         ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
871
872         new_flags = read_mnt_flags("/mnt/A");
873         ASSERT_EQ(new_flags, expected_flags);
874
875         new_flags = read_mnt_flags("/mnt/A/AA");
876         ASSERT_EQ(new_flags, expected_flags);
877
878         new_flags = read_mnt_flags("/mnt/A/AA/B");
879         ASSERT_EQ(new_flags, expected_flags);
880
881         new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
882         ASSERT_EQ(new_flags, expected_flags);
883
884         memset(&attr, 0, sizeof(attr));
885         attr.attr_clr = MOUNT_ATTR_NODIRATIME;
886         ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
887
888         expected_flags &= ~MS_NODIRATIME;
889
890         new_flags = read_mnt_flags("/mnt/A");
891         ASSERT_EQ(new_flags, expected_flags);
892
893         new_flags = read_mnt_flags("/mnt/A/AA");
894         ASSERT_EQ(new_flags, expected_flags);
895
896         new_flags = read_mnt_flags("/mnt/A/AA/B");
897         ASSERT_EQ(new_flags, expected_flags);
898
899         new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
900         ASSERT_EQ(new_flags, expected_flags);
901 }
902
903 TEST_F(mount_setattr, multi_threaded)
904 {
905         int i, j, nthreads, ret = 0;
906         unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
907         pthread_attr_t pattr;
908         pthread_t threads[DEFAULT_THREADS];
909
910         if (!mount_setattr_supported())
911                 SKIP(return, "mount_setattr syscall not supported");
912
913         old_flags = read_mnt_flags("/mnt/A");
914         ASSERT_GT(old_flags, 0);
915
916         /* Try to change mount options from multiple threads. */
917         nthreads = get_nprocs_conf();
918         if (nthreads > DEFAULT_THREADS)
919                 nthreads = DEFAULT_THREADS;
920
921         pthread_attr_init(&pattr);
922         for (i = 0; i < nthreads; i++)
923                 ASSERT_EQ(pthread_create(&threads[i], &pattr, mount_setattr_thread, NULL), 0);
924
925         for (j = 0; j < i; j++) {
926                 void *retptr = NULL;
927
928                 EXPECT_EQ(pthread_join(threads[j], &retptr), 0);
929
930                 ret += ptr_to_int(retptr);
931                 EXPECT_EQ(ret, 0);
932         }
933         pthread_attr_destroy(&pattr);
934
935         ASSERT_EQ(ret, 0);
936
937         expected_flags = old_flags;
938         expected_flags |= MS_RDONLY;
939         expected_flags |= MS_NOSUID;
940         new_flags = read_mnt_flags("/mnt/A");
941         ASSERT_EQ(new_flags, expected_flags);
942
943         ASSERT_EQ(is_shared_mount("/mnt/A"), true);
944
945         new_flags = read_mnt_flags("/mnt/A/AA");
946         ASSERT_EQ(new_flags, expected_flags);
947
948         ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
949
950         new_flags = read_mnt_flags("/mnt/A/AA/B");
951         ASSERT_EQ(new_flags, expected_flags);
952
953         ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
954
955         new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
956         ASSERT_EQ(new_flags, expected_flags);
957
958         ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
959 }
960
961 TEST_F(mount_setattr, wrong_user_namespace)
962 {
963         int ret;
964         struct mount_attr attr = {
965                 .attr_set = MOUNT_ATTR_RDONLY,
966         };
967
968         if (!mount_setattr_supported())
969                 SKIP(return, "mount_setattr syscall not supported");
970
971         EXPECT_EQ(create_and_enter_userns(), 0);
972         ret = sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr));
973         ASSERT_LT(ret, 0);
974         ASSERT_EQ(errno, EPERM);
975 }
976
977 TEST_F(mount_setattr, wrong_mount_namespace)
978 {
979         int fd, ret;
980         struct mount_attr attr = {
981                 .attr_set = MOUNT_ATTR_RDONLY,
982         };
983
984         if (!mount_setattr_supported())
985                 SKIP(return, "mount_setattr syscall not supported");
986
987         fd = open("/mnt/A", O_DIRECTORY | O_CLOEXEC);
988         ASSERT_GE(fd, 0);
989
990         ASSERT_EQ(unshare(CLONE_NEWNS), 0);
991
992         ret = sys_mount_setattr(fd, "", AT_EMPTY_PATH | AT_RECURSIVE, &attr, sizeof(attr));
993         ASSERT_LT(ret, 0);
994         ASSERT_EQ(errno, EINVAL);
995 }
996
997 FIXTURE(mount_setattr_idmapped) {
998 };
999
1000 FIXTURE_SETUP(mount_setattr_idmapped)
1001 {
1002         int img_fd = -EBADF;
1003
1004         ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1005
1006         ASSERT_EQ(mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0), 0);
1007
1008         (void)umount2("/mnt", MNT_DETACH);
1009         (void)umount2("/tmp", MNT_DETACH);
1010
1011         ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
1012                         "size=100000,mode=700"), 0);
1013
1014         ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
1015         ASSERT_EQ(mknodat(-EBADF, "/tmp/B/b", S_IFREG | 0644, 0), 0);
1016         ASSERT_EQ(chown("/tmp/B/b", 0, 0), 0);
1017
1018         ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
1019                         "size=100000,mode=700"), 0);
1020
1021         ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
1022         ASSERT_EQ(mknodat(-EBADF, "/tmp/B/BB/b", S_IFREG | 0644, 0), 0);
1023         ASSERT_EQ(chown("/tmp/B/BB/b", 0, 0), 0);
1024
1025         ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
1026                         "size=100000,mode=700"), 0);
1027
1028         ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
1029                         "size=100000,mode=700"), 0);
1030
1031         ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
1032
1033         ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
1034                         "size=100000,mode=700"), 0);
1035
1036         ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
1037
1038         ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
1039
1040         ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
1041
1042         ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
1043                         MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
1044
1045         ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
1046
1047         ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
1048                         MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
1049
1050         ASSERT_EQ(mkdir("/mnt/C", 0777), 0);
1051         ASSERT_EQ(mkdir("/mnt/D", 0777), 0);
1052         img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600);
1053         ASSERT_GE(img_fd, 0);
1054         ASSERT_EQ(ftruncate(img_fd, 1024 * 2048), 0);
1055         ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0);
1056         ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0);
1057         ASSERT_EQ(close(img_fd), 0);
1058 }
1059
1060 FIXTURE_TEARDOWN(mount_setattr_idmapped)
1061 {
1062         (void)umount2("/mnt/A", MNT_DETACH);
1063         (void)umount2("/tmp", MNT_DETACH);
1064 }
1065
1066 /**
1067  * Validate that negative fd values are rejected.
1068  */
1069 TEST_F(mount_setattr_idmapped, invalid_fd_negative)
1070 {
1071         struct mount_attr attr = {
1072                 .attr_set       = MOUNT_ATTR_IDMAP,
1073                 .userns_fd      = -EBADF,
1074         };
1075
1076         if (!mount_setattr_supported())
1077                 SKIP(return, "mount_setattr syscall not supported");
1078
1079         ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1080                 TH_LOG("failure: created idmapped mount with negative fd");
1081         }
1082 }
1083
1084 /**
1085  * Validate that excessively large fd values are rejected.
1086  */
1087 TEST_F(mount_setattr_idmapped, invalid_fd_large)
1088 {
1089         struct mount_attr attr = {
1090                 .attr_set       = MOUNT_ATTR_IDMAP,
1091                 .userns_fd      = INT64_MAX,
1092         };
1093
1094         if (!mount_setattr_supported())
1095                 SKIP(return, "mount_setattr syscall not supported");
1096
1097         ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1098                 TH_LOG("failure: created idmapped mount with too large fd value");
1099         }
1100 }
1101
1102 /**
1103  * Validate that closed fd values are rejected.
1104  */
1105 TEST_F(mount_setattr_idmapped, invalid_fd_closed)
1106 {
1107         int fd;
1108         struct mount_attr attr = {
1109                 .attr_set = MOUNT_ATTR_IDMAP,
1110         };
1111
1112         if (!mount_setattr_supported())
1113                 SKIP(return, "mount_setattr syscall not supported");
1114
1115         fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
1116         ASSERT_GE(fd, 0);
1117         ASSERT_GE(close(fd), 0);
1118
1119         attr.userns_fd = fd;
1120         ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1121                 TH_LOG("failure: created idmapped mount with closed fd");
1122         }
1123 }
1124
1125 /**
1126  * Validate that the initial user namespace is rejected.
1127  */
1128 TEST_F(mount_setattr_idmapped, invalid_fd_initial_userns)
1129 {
1130         int open_tree_fd = -EBADF;
1131         struct mount_attr attr = {
1132                 .attr_set = MOUNT_ATTR_IDMAP,
1133         };
1134
1135         if (!mount_setattr_supported())
1136                 SKIP(return, "mount_setattr syscall not supported");
1137
1138         open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1139                                      AT_NO_AUTOMOUNT |
1140                                      AT_SYMLINK_NOFOLLOW |
1141                                      OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1142         ASSERT_GE(open_tree_fd, 0);
1143
1144         attr.userns_fd = open("/proc/1/ns/user", O_RDONLY | O_CLOEXEC);
1145         ASSERT_GE(attr.userns_fd, 0);
1146         ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1147         ASSERT_EQ(errno, EPERM);
1148         ASSERT_EQ(close(attr.userns_fd), 0);
1149         ASSERT_EQ(close(open_tree_fd), 0);
1150 }
1151
1152 static int map_ids(pid_t pid, unsigned long nsid, unsigned long hostid,
1153                    unsigned long range)
1154 {
1155         char map[100], procfile[256];
1156
1157         snprintf(procfile, sizeof(procfile), "/proc/%d/uid_map", pid);
1158         snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
1159         if (write_file(procfile, map, strlen(map)))
1160                 return -1;
1161
1162
1163         snprintf(procfile, sizeof(procfile), "/proc/%d/gid_map", pid);
1164         snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
1165         if (write_file(procfile, map, strlen(map)))
1166                 return -1;
1167
1168         return 0;
1169 }
1170
1171 #define __STACK_SIZE (8 * 1024 * 1024)
1172 static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
1173 {
1174         void *stack;
1175
1176         stack = malloc(__STACK_SIZE);
1177         if (!stack)
1178                 return -ENOMEM;
1179
1180 #ifdef __ia64__
1181         return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
1182 #else
1183         return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
1184 #endif
1185 }
1186
1187 static int get_userns_fd_cb(void *data)
1188 {
1189         return kill(getpid(), SIGSTOP);
1190 }
1191
1192 static int wait_for_pid(pid_t pid)
1193 {
1194         int status, ret;
1195
1196 again:
1197         ret = waitpid(pid, &status, 0);
1198         if (ret == -1) {
1199                 if (errno == EINTR)
1200                         goto again;
1201
1202                 return -1;
1203         }
1204
1205         if (!WIFEXITED(status))
1206                 return -1;
1207
1208         return WEXITSTATUS(status);
1209 }
1210
1211 static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range)
1212 {
1213         int ret;
1214         pid_t pid;
1215         char path[256];
1216
1217         pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER);
1218         if (pid < 0)
1219                 return -errno;
1220
1221         ret = map_ids(pid, nsid, hostid, range);
1222         if (ret < 0)
1223                 return ret;
1224
1225         snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
1226         ret = open(path, O_RDONLY | O_CLOEXEC);
1227         kill(pid, SIGKILL);
1228         wait_for_pid(pid);
1229         return ret;
1230 }
1231
1232 /**
1233  * Validate that an attached mount in our mount namespace cannot be idmapped.
1234  * (The kernel enforces that the mount's mount namespace and the caller's mount
1235  *  namespace match.)
1236  */
1237 TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace)
1238 {
1239         int open_tree_fd = -EBADF;
1240         struct mount_attr attr = {
1241                 .attr_set = MOUNT_ATTR_IDMAP,
1242         };
1243
1244         if (!mount_setattr_supported())
1245                 SKIP(return, "mount_setattr syscall not supported");
1246
1247         open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1248                                      AT_EMPTY_PATH |
1249                                      AT_NO_AUTOMOUNT |
1250                                      AT_SYMLINK_NOFOLLOW |
1251                                      OPEN_TREE_CLOEXEC);
1252         ASSERT_GE(open_tree_fd, 0);
1253
1254         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
1255         ASSERT_GE(attr.userns_fd, 0);
1256         ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1257         ASSERT_EQ(close(attr.userns_fd), 0);
1258         ASSERT_EQ(close(open_tree_fd), 0);
1259 }
1260
1261 /**
1262  * Validate that idmapping a mount is rejected if the mount's mount namespace
1263  * and our mount namespace don't match.
1264  * (The kernel enforces that the mount's mount namespace and the caller's mount
1265  *  namespace match.)
1266  */
1267 TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace)
1268 {
1269         int open_tree_fd = -EBADF;
1270         struct mount_attr attr = {
1271                 .attr_set = MOUNT_ATTR_IDMAP,
1272         };
1273
1274         if (!mount_setattr_supported())
1275                 SKIP(return, "mount_setattr syscall not supported");
1276
1277         open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1278                                      AT_EMPTY_PATH |
1279                                      AT_NO_AUTOMOUNT |
1280                                      AT_SYMLINK_NOFOLLOW |
1281                                      OPEN_TREE_CLOEXEC);
1282         ASSERT_GE(open_tree_fd, 0);
1283
1284         ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1285
1286         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
1287         ASSERT_GE(attr.userns_fd, 0);
1288         ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
1289                                     sizeof(attr)), 0);
1290         ASSERT_EQ(close(attr.userns_fd), 0);
1291         ASSERT_EQ(close(open_tree_fd), 0);
1292 }
1293
1294 /**
1295  * Validate that an attached mount in our mount namespace can be idmapped.
1296  */
1297 TEST_F(mount_setattr_idmapped, detached_mount_inside_current_mount_namespace)
1298 {
1299         int open_tree_fd = -EBADF;
1300         struct mount_attr attr = {
1301                 .attr_set = MOUNT_ATTR_IDMAP,
1302         };
1303
1304         if (!mount_setattr_supported())
1305                 SKIP(return, "mount_setattr syscall not supported");
1306
1307         open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1308                                      AT_EMPTY_PATH |
1309                                      AT_NO_AUTOMOUNT |
1310                                      AT_SYMLINK_NOFOLLOW |
1311                                      OPEN_TREE_CLOEXEC |
1312                                      OPEN_TREE_CLONE);
1313         ASSERT_GE(open_tree_fd, 0);
1314
1315         /* Changing mount properties on a detached mount. */
1316         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
1317         ASSERT_GE(attr.userns_fd, 0);
1318         ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1319                                     AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1320         ASSERT_EQ(close(attr.userns_fd), 0);
1321         ASSERT_EQ(close(open_tree_fd), 0);
1322 }
1323
1324 /**
1325  * Validate that a detached mount not in our mount namespace can be idmapped.
1326  */
1327 TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace)
1328 {
1329         int open_tree_fd = -EBADF;
1330         struct mount_attr attr = {
1331                 .attr_set = MOUNT_ATTR_IDMAP,
1332         };
1333
1334         if (!mount_setattr_supported())
1335                 SKIP(return, "mount_setattr syscall not supported");
1336
1337         open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1338                                      AT_EMPTY_PATH |
1339                                      AT_NO_AUTOMOUNT |
1340                                      AT_SYMLINK_NOFOLLOW |
1341                                      OPEN_TREE_CLOEXEC |
1342                                      OPEN_TREE_CLONE);
1343         ASSERT_GE(open_tree_fd, 0);
1344
1345         ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1346
1347         /* Changing mount properties on a detached mount. */
1348         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
1349         ASSERT_GE(attr.userns_fd, 0);
1350         ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1351                                     AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1352         ASSERT_EQ(close(attr.userns_fd), 0);
1353         ASSERT_EQ(close(open_tree_fd), 0);
1354 }
1355
1356 /**
1357  * Validate that currently changing the idmapping of an idmapped mount fails.
1358  */
1359 TEST_F(mount_setattr_idmapped, change_idmapping)
1360 {
1361         int open_tree_fd = -EBADF;
1362         struct mount_attr attr = {
1363                 .attr_set = MOUNT_ATTR_IDMAP,
1364         };
1365
1366         if (!mount_setattr_supported())
1367                 SKIP(return, "mount_setattr syscall not supported");
1368
1369         open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1370                                      AT_EMPTY_PATH |
1371                                      AT_NO_AUTOMOUNT |
1372                                      AT_SYMLINK_NOFOLLOW |
1373                                      OPEN_TREE_CLOEXEC |
1374                                      OPEN_TREE_CLONE);
1375         ASSERT_GE(open_tree_fd, 0);
1376
1377         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
1378         ASSERT_GE(attr.userns_fd, 0);
1379         ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1380                                     AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1381         ASSERT_EQ(close(attr.userns_fd), 0);
1382
1383         /* Change idmapping on a detached mount that is already idmapped. */
1384         attr.userns_fd  = get_userns_fd(0, 20000, 10000);
1385         ASSERT_GE(attr.userns_fd, 0);
1386         ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1387         ASSERT_EQ(close(attr.userns_fd), 0);
1388         ASSERT_EQ(close(open_tree_fd), 0);
1389 }
1390
1391 static bool expected_uid_gid(int dfd, const char *path, int flags,
1392                              uid_t expected_uid, gid_t expected_gid)
1393 {
1394         int ret;
1395         struct stat st;
1396
1397         ret = fstatat(dfd, path, &st, flags);
1398         if (ret < 0)
1399                 return false;
1400
1401         return st.st_uid == expected_uid && st.st_gid == expected_gid;
1402 }
1403
1404 TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid)
1405 {
1406         int open_tree_fd = -EBADF;
1407         struct mount_attr attr = {
1408                 .attr_set = MOUNT_ATTR_IDMAP,
1409         };
1410
1411         if (!mount_setattr_supported())
1412                 SKIP(return, "mount_setattr syscall not supported");
1413
1414         ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
1415         ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
1416
1417         open_tree_fd = sys_open_tree(-EBADF, "/mnt/A",
1418                                      AT_RECURSIVE |
1419                                      AT_EMPTY_PATH |
1420                                      AT_NO_AUTOMOUNT |
1421                                      AT_SYMLINK_NOFOLLOW |
1422                                      OPEN_TREE_CLOEXEC |
1423                                      OPEN_TREE_CLONE);
1424         ASSERT_GE(open_tree_fd, 0);
1425
1426         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
1427         ASSERT_GE(attr.userns_fd, 0);
1428         ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1429         ASSERT_EQ(close(attr.userns_fd), 0);
1430         ASSERT_EQ(close(open_tree_fd), 0);
1431
1432         ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
1433         ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
1434         ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/b", 0, 0, 0), 0);
1435         ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0);
1436 }
1437
1438 TEST_F(mount_setattr, mount_attr_nosymfollow)
1439 {
1440         int fd;
1441         unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
1442         struct mount_attr attr = {
1443                 .attr_set       = MOUNT_ATTR_NOSYMFOLLOW,
1444         };
1445
1446         if (!mount_setattr_supported())
1447                 SKIP(return, "mount_setattr syscall not supported");
1448
1449         fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1450         ASSERT_GT(fd, 0);
1451         ASSERT_EQ(close(fd), 0);
1452
1453         old_flags = read_mnt_flags("/mnt/A");
1454         ASSERT_GT(old_flags, 0);
1455
1456         ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
1457
1458         expected_flags = old_flags;
1459         expected_flags |= ST_NOSYMFOLLOW;
1460
1461         new_flags = read_mnt_flags("/mnt/A");
1462         ASSERT_EQ(new_flags, expected_flags);
1463
1464         new_flags = read_mnt_flags("/mnt/A/AA");
1465         ASSERT_EQ(new_flags, expected_flags);
1466
1467         new_flags = read_mnt_flags("/mnt/A/AA/B");
1468         ASSERT_EQ(new_flags, expected_flags);
1469
1470         new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
1471         ASSERT_EQ(new_flags, expected_flags);
1472
1473         fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1474         ASSERT_LT(fd, 0);
1475         ASSERT_EQ(errno, ELOOP);
1476
1477         attr.attr_set &= ~MOUNT_ATTR_NOSYMFOLLOW;
1478         attr.attr_clr |= MOUNT_ATTR_NOSYMFOLLOW;
1479
1480         ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
1481
1482         expected_flags &= ~ST_NOSYMFOLLOW;
1483         new_flags = read_mnt_flags("/mnt/A");
1484         ASSERT_EQ(new_flags, expected_flags);
1485
1486         new_flags = read_mnt_flags("/mnt/A/AA");
1487         ASSERT_EQ(new_flags, expected_flags);
1488
1489         new_flags = read_mnt_flags("/mnt/A/AA/B");
1490         ASSERT_EQ(new_flags, expected_flags);
1491
1492         new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
1493         ASSERT_EQ(new_flags, expected_flags);
1494
1495         fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1496         ASSERT_GT(fd, 0);
1497         ASSERT_EQ(close(fd), 0);
1498 }
1499
1500 TEST_HARNESS_MAIN