1 # SOME DESCRIPTIVE TITLE
2 # Copyright (C) YEAR Free Software Foundation, Inc.
3 # This file is distributed under the same license as the PACKAGE package.
4 # FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
9 "Project-Id-Version: PACKAGE VERSION\n"
10 "POT-Creation-Date: 2015-01-21 20:35+0900\n"
11 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
12 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
13 "Language-Team: LANGUAGE <LL@li.org>\n"
16 "Content-Type: text/plain; charset=UTF-8\n"
17 "Content-Transfer-Encoding: 8bit\n"
20 #: build/C/man2/acct.2:31 build/C/man5/acct.5:25
26 #: build/C/man2/acct.2:31
32 #: build/C/man2/acct.2:31 build/C/man5/acct.5:25 build/C/man7/capabilities.7:48 build/C/man2/capget.2:15 build/C/man7/cpuset.7:25 build/C/man7/credentials.7:27 build/C/man2/getgid.2:25 build/C/man2/getgroups.2:31 build/C/man2/getpid.2:25 build/C/man2/getpriority.2:45 build/C/man2/getresuid.2:28 build/C/man2/getrlimit.2:64 build/C/man2/getrusage.2:39 build/C/man2/getsid.2:26 build/C/man2/getuid.2:26 build/C/man2/iopl.2:33 build/C/man2/ioprio_set.2:24 build/C/man2/ipc.2:25 build/C/man7/namespaces.7:27 build/C/man7/pid_namespaces.7:27 build/C/man2/seteuid.2:29 build/C/man2/setfsgid.2:31 build/C/man2/setfsuid.2:31 build/C/man2/setgid.2:29 build/C/man2/setpgid.2:48 build/C/man2/setresuid.2:26 build/C/man2/setreuid.2:45 build/C/man2/setsid.2:31 build/C/man2/setuid.2:30 build/C/man7/svipc.7:40 build/C/man3/ulimit.3:27 build/C/man7/user_namespaces.7:27 build/C/man2/seccomp.2:27
38 #: build/C/man2/acct.2:31 build/C/man5/acct.5:25 build/C/man7/capabilities.7:48 build/C/man2/capget.2:15 build/C/man7/cpuset.7:25 build/C/man7/credentials.7:27 build/C/man2/getgid.2:25 build/C/man2/getgroups.2:31 build/C/man2/getpid.2:25 build/C/man2/getpriority.2:45 build/C/man2/getresuid.2:28 build/C/man2/getrlimit.2:64 build/C/man2/getrusage.2:39 build/C/man2/getsid.2:26 build/C/man2/getuid.2:26 build/C/man3/group_member.3:25 build/C/man2/iopl.2:33 build/C/man2/ioprio_set.2:24 build/C/man2/ipc.2:25 build/C/man7/namespaces.7:27 build/C/man7/pid_namespaces.7:27 build/C/man2/seteuid.2:29 build/C/man2/setfsgid.2:31 build/C/man2/setfsuid.2:31 build/C/man2/setgid.2:29 build/C/man2/setpgid.2:48 build/C/man2/setresuid.2:26 build/C/man2/setreuid.2:45 build/C/man2/setsid.2:31 build/C/man2/setuid.2:30 build/C/man7/svipc.7:40 build/C/man3/ulimit.3:27 build/C/man7/user_namespaces.7:27 build/C/man2/seccomp.2:27
40 msgid "Linux Programmer's Manual"
44 #: build/C/man2/acct.2:32 build/C/man5/acct.5:26 build/C/man7/capabilities.7:49 build/C/man2/capget.2:16 build/C/man7/cpuset.7:26 build/C/man7/credentials.7:28 build/C/man2/getgid.2:26 build/C/man2/getgroups.2:32 build/C/man2/getpid.2:26 build/C/man2/getpriority.2:46 build/C/man2/getresuid.2:29 build/C/man2/getrlimit.2:65 build/C/man2/getrusage.2:40 build/C/man2/getsid.2:27 build/C/man2/getuid.2:27 build/C/man3/group_member.3:26 build/C/man2/iopl.2:34 build/C/man2/ioprio_set.2:25 build/C/man2/ipc.2:26 build/C/man7/namespaces.7:28 build/C/man7/pid_namespaces.7:28 build/C/man2/seteuid.2:30 build/C/man2/setfsgid.2:32 build/C/man2/setfsuid.2:32 build/C/man2/setgid.2:30 build/C/man2/setpgid.2:49 build/C/man2/setresuid.2:27 build/C/man2/setreuid.2:46 build/C/man2/setsid.2:32 build/C/man2/setuid.2:31 build/C/man7/svipc.7:41 build/C/man3/ulimit.3:28 build/C/man7/user_namespaces.7:28 build/C/man2/seccomp.2:28
50 #: build/C/man2/acct.2:34
51 msgid "acct - switch process accounting on or off"
55 #: build/C/man2/acct.2:34 build/C/man5/acct.5:28 build/C/man2/capget.2:18 build/C/man2/getgid.2:28 build/C/man2/getgroups.2:34 build/C/man2/getpid.2:28 build/C/man2/getpriority.2:48 build/C/man2/getresuid.2:31 build/C/man2/getrlimit.2:67 build/C/man2/getrusage.2:42 build/C/man2/getsid.2:29 build/C/man2/getuid.2:29 build/C/man3/group_member.3:28 build/C/man2/iopl.2:36 build/C/man2/ioprio_set.2:27 build/C/man2/ipc.2:28 build/C/man2/seteuid.2:32 build/C/man2/setfsgid.2:34 build/C/man2/setfsuid.2:34 build/C/man2/setgid.2:32 build/C/man2/setpgid.2:51 build/C/man2/setresuid.2:29 build/C/man2/setreuid.2:48 build/C/man2/setsid.2:34 build/C/man2/setuid.2:33 build/C/man7/svipc.7:43 build/C/man3/ulimit.3:30 build/C/man2/seccomp.2:30
61 #: build/C/man2/acct.2:38
63 msgid "B<#include E<lt>unistd.hE<gt>>\n"
67 #: build/C/man2/acct.2:40
69 msgid "B<int acct(const char *>I<filename>B<);>\n"
73 #: build/C/man2/acct.2:46 build/C/man2/getgroups.2:48 build/C/man2/getrlimit.2:84 build/C/man2/getsid.2:37 build/C/man3/group_member.3:36 build/C/man2/seteuid.2:44 build/C/man2/setpgid.2:71 build/C/man2/setreuid.2:60
74 msgid "Feature Test Macro Requirements for glibc (see B<feature_test_macros>(7)):"
78 #: build/C/man2/acct.2:50
79 msgid "B<acct>(): _BSD_SOURCE || (_XOPEN_SOURCE && _XOPEN_SOURCE\\ E<lt>\\ 500)"
83 #: build/C/man2/acct.2:50 build/C/man5/acct.5:30 build/C/man7/capabilities.7:51 build/C/man2/capget.2:24 build/C/man7/cpuset.7:28 build/C/man7/credentials.7:30 build/C/man2/getgid.2:36 build/C/man2/getgroups.2:52 build/C/man2/getpid.2:36 build/C/man2/getpriority.2:56 build/C/man2/getresuid.2:39 build/C/man2/getrlimit.2:88 build/C/man2/getrusage.2:48 build/C/man2/getsid.2:50 build/C/man2/getuid.2:37 build/C/man3/group_member.3:40 build/C/man2/iopl.2:40 build/C/man2/ioprio_set.2:35 build/C/man2/ipc.2:34 build/C/man7/namespaces.7:30 build/C/man7/pid_namespaces.7:30 build/C/man2/seteuid.2:53 build/C/man2/setfsgid.2:38 build/C/man2/setfsuid.2:38 build/C/man2/setgid.2:38 build/C/man2/setpgid.2:100 build/C/man2/setresuid.2:37 build/C/man2/setreuid.2:70 build/C/man2/setsid.2:41 build/C/man2/setuid.2:39 build/C/man7/svipc.7:49 build/C/man3/ulimit.3:34 build/C/man7/user_namespaces.7:30 build/C/man2/seccomp.2:43
89 #: build/C/man2/acct.2:60
91 "The B<acct>() system call enables or disables process accounting. If "
92 "called with the name of an existing file as its argument, accounting is "
93 "turned on, and records for each terminating process are appended to "
94 "I<filename> as it terminates. An argument of NULL causes accounting to be "
99 #: build/C/man2/acct.2:60 build/C/man2/capget.2:160 build/C/man2/getgroups.2:92 build/C/man2/getpriority.2:104 build/C/man2/getresuid.2:50 build/C/man2/getrlimit.2:461 build/C/man2/getrusage.2:188 build/C/man2/getsid.2:58 build/C/man3/group_member.3:48 build/C/man2/iopl.2:66 build/C/man2/ioprio_set.2:149 build/C/man2/seteuid.2:67 build/C/man2/setfsgid.2:68 build/C/man2/setfsuid.2:68 build/C/man2/setgid.2:53 build/C/man2/setpgid.2:195 build/C/man2/setresuid.2:64 build/C/man2/setreuid.2:93 build/C/man2/setsid.2:54 build/C/man2/setuid.2:70 build/C/man3/ulimit.3:67 build/C/man2/seccomp.2:342
105 #: build/C/man2/acct.2:65 build/C/man2/capget.2:165 build/C/man2/getresuid.2:55 build/C/man2/getrusage.2:193 build/C/man2/iopl.2:71 build/C/man2/seteuid.2:72 build/C/man2/setgid.2:58 build/C/man2/setresuid.2:69 build/C/man2/setreuid.2:98 build/C/man2/setuid.2:75
107 "On success, zero is returned. On error, -1 is returned, and I<errno> is set "
112 #: build/C/man2/acct.2:65 build/C/man2/capget.2:179 build/C/man7/cpuset.7:1100 build/C/man2/getgid.2:42 build/C/man2/getgroups.2:106 build/C/man2/getpid.2:44 build/C/man2/getpriority.2:117 build/C/man2/getresuid.2:55 build/C/man2/getrlimit.2:466 build/C/man2/getrusage.2:193 build/C/man2/getsid.2:63 build/C/man2/getuid.2:43 build/C/man2/iopl.2:71 build/C/man2/ioprio_set.2:169 build/C/man2/seteuid.2:79 build/C/man2/setgid.2:58 build/C/man2/setpgid.2:216 build/C/man2/setresuid.2:76 build/C/man2/setreuid.2:105 build/C/man2/setsid.2:61 build/C/man2/setuid.2:82 build/C/man3/ulimit.3:74 build/C/man2/seccomp.2:358
118 #: build/C/man2/acct.2:66 build/C/man7/cpuset.7:1116 build/C/man7/cpuset.7:1123 build/C/man7/cpuset.7:1129 build/C/man7/cpuset.7:1137 build/C/man7/cpuset.7:1144 build/C/man2/getpriority.2:137 build/C/man2/setpgid.2:217
124 #: build/C/man2/acct.2:77
126 "Write permission is denied for the specified file, or search permission is "
127 "denied for one of the directories in the path prefix of I<filename> (see "
128 "also B<path_resolution>(7)), or I<filename> is not a regular file."
132 #: build/C/man2/acct.2:77 build/C/man2/capget.2:180 build/C/man7/cpuset.7:1172 build/C/man2/getgroups.2:107 build/C/man2/getresuid.2:56 build/C/man2/getrlimit.2:467 build/C/man2/getrusage.2:194 build/C/man2/seccomp.2:369
138 #: build/C/man2/acct.2:81
139 msgid "I<filename> points outside your accessible address space."
143 #: build/C/man2/acct.2:81 build/C/man7/cpuset.7:1238 build/C/man7/cpuset.7:1246
149 #: build/C/man2/acct.2:85
150 msgid "Error writing to the file I<filename>."
154 #: build/C/man2/acct.2:85
160 #: build/C/man2/acct.2:89
161 msgid "I<filename> is a directory."
165 #: build/C/man2/acct.2:89
171 #: build/C/man2/acct.2:93
172 msgid "Too many symbolic links were encountered in resolving I<filename>."
176 #: build/C/man2/acct.2:93 build/C/man7/cpuset.7:1251 build/C/man7/cpuset.7:1258 build/C/man7/cpuset.7:1263
178 msgid "B<ENAMETOOLONG>"
182 #: build/C/man2/acct.2:97
183 msgid "I<filename> was too long."
187 #: build/C/man2/acct.2:97
193 #: build/C/man2/acct.2:100
194 msgid "The system limit on the total number of open files has been reached."
198 #: build/C/man2/acct.2:100 build/C/man7/cpuset.7:1275 build/C/man7/cpuset.7:1280
204 #: build/C/man2/acct.2:103
205 msgid "The specified filename does not exist."
209 #: build/C/man2/acct.2:103 build/C/man7/cpuset.7:1287 build/C/man2/getgroups.2:127 build/C/man2/seccomp.2:413 build/C/man2/seccomp.2:416
215 #: build/C/man2/acct.2:106 build/C/man2/getgroups.2:130 build/C/man2/seccomp.2:416
216 msgid "Out of memory."
220 #: build/C/man2/acct.2:106 build/C/man2/iopl.2:76
226 #: build/C/man2/acct.2:112
228 "BSD process accounting has not been enabled when the operating system kernel "
229 "was compiled. The kernel configuration parameter controlling this feature "
230 "is B<CONFIG_BSD_PROCESS_ACCT>."
234 #: build/C/man2/acct.2:112 build/C/man7/cpuset.7:1314
240 #: build/C/man2/acct.2:117
241 msgid "A component used as a directory in I<filename> is not in fact a directory."
245 #: build/C/man2/acct.2:117 build/C/man2/capget.2:191 build/C/man2/capget.2:196 build/C/man7/cpuset.7:1319 build/C/man2/getgroups.2:130 build/C/man2/getpriority.2:149 build/C/man2/getrlimit.2:483 build/C/man2/getrlimit.2:488 build/C/man2/getrlimit.2:496 build/C/man2/getsid.2:64 build/C/man2/iopl.2:79 build/C/man2/ioprio_set.2:179 build/C/man2/seteuid.2:83 build/C/man2/setgid.2:64 build/C/man2/setpgid.2:231 build/C/man2/setresuid.2:103 build/C/man2/setreuid.2:132 build/C/man2/setsid.2:62 build/C/man2/setuid.2:110 build/C/man3/ulimit.3:75
251 #: build/C/man2/acct.2:123
253 "The calling process has insufficient privilege to enable process "
254 "accounting. On Linux the B<CAP_SYS_PACCT> capability is required."
258 #: build/C/man2/acct.2:123
264 #: build/C/man2/acct.2:127
265 msgid "I<filename> refers to a file on a read-only filesystem."
269 #: build/C/man2/acct.2:127
275 #: build/C/man2/acct.2:130
276 msgid "There are no more free file structures or we ran out of memory."
280 #: build/C/man2/acct.2:130 build/C/man5/acct.5:153 build/C/man7/capabilities.7:1120 build/C/man2/capget.2:218 build/C/man7/credentials.7:287 build/C/man2/getgid.2:44 build/C/man2/getgroups.2:133 build/C/man2/getpid.2:46 build/C/man2/getpriority.2:157 build/C/man2/getresuid.2:67 build/C/man2/getrlimit.2:511 build/C/man2/getrusage.2:202 build/C/man2/getsid.2:79 build/C/man2/getuid.2:45 build/C/man3/group_member.3:55 build/C/man2/iopl.2:87 build/C/man2/ioprio_set.2:196 build/C/man2/ipc.2:45 build/C/man7/namespaces.7:359 build/C/man7/pid_namespaces.7:351 build/C/man2/seteuid.2:99 build/C/man2/setfsgid.2:75 build/C/man2/setfsuid.2:75 build/C/man2/setgid.2:71 build/C/man2/setpgid.2:250 build/C/man2/setresuid.2:109 build/C/man2/setreuid.2:148 build/C/man2/setsid.2:68 build/C/man2/setuid.2:117 build/C/man3/ulimit.3:78 build/C/man7/user_namespaces.7:645 build/C/man2/seccomp.2:435
282 msgid "CONFORMING TO"
285 #. SVr4 documents an EBUSY error condition, but no EISDIR or ENOSYS.
286 #. Also AIX and HP-UX document EBUSY (attempt is made
287 #. to enable accounting when it is already enabled), as does Solaris
288 #. (attempt is made to enable accounting using the same file that is
289 #. currently being used).
291 #: build/C/man2/acct.2:137
292 msgid "SVr4, 4.3BSD (but not POSIX)."
296 #: build/C/man2/acct.2:137 build/C/man5/acct.5:157 build/C/man7/capabilities.7:1126 build/C/man2/capget.2:220 build/C/man7/cpuset.7:1341 build/C/man7/credentials.7:293 build/C/man2/getgid.2:46 build/C/man2/getgroups.2:141 build/C/man2/getpid.2:48 build/C/man2/getpriority.2:160 build/C/man2/getresuid.2:70 build/C/man2/getrlimit.2:534 build/C/man2/getrusage.2:213 build/C/man2/getsid.2:81 build/C/man2/getuid.2:47 build/C/man2/iopl.2:91 build/C/man2/ioprio_set.2:198 build/C/man2/ipc.2:49 build/C/man2/seteuid.2:101 build/C/man2/setfsgid.2:79 build/C/man2/setfsuid.2:79 build/C/man2/setgid.2:73 build/C/man2/setpgid.2:272 build/C/man2/setresuid.2:112 build/C/man2/setreuid.2:154 build/C/man2/setsid.2:70 build/C/man2/setuid.2:122 build/C/man7/user_namespaces.7:648 build/C/man2/seccomp.2:439
302 #: build/C/man2/acct.2:140
304 "No accounting is produced for programs running when a system crash occurs. "
305 "In particular, nonterminating processes are never accounted for."
309 #: build/C/man2/acct.2:143
311 "The structure of the records written to the accounting file is described in "
316 #: build/C/man2/acct.2:143 build/C/man5/acct.5:174 build/C/man7/capabilities.7:1183 build/C/man2/capget.2:228 build/C/man7/cpuset.7:1488 build/C/man7/credentials.7:304 build/C/man2/getgid.2:62 build/C/man2/getgroups.2:178 build/C/man2/getpid.2:100 build/C/man2/getpriority.2:232 build/C/man2/getresuid.2:86 build/C/man2/getrlimit.2:766 build/C/man2/getrusage.2:253 build/C/man2/getsid.2:84 build/C/man2/getuid.2:73 build/C/man3/group_member.3:57 build/C/man2/iopl.2:100 build/C/man2/ioprio_set.2:346 build/C/man2/ipc.2:57 build/C/man7/namespaces.7:364 build/C/man7/pid_namespaces.7:356 build/C/man2/seteuid.2:141 build/C/man2/setfsgid.2:123 build/C/man2/setfsuid.2:131 build/C/man2/setgid.2:83 build/C/man2/setpgid.2:340 build/C/man2/setresuid.2:132 build/C/man2/setreuid.2:194 build/C/man2/setsid.2:93 build/C/man2/setuid.2:145 build/C/man7/svipc.7:335 build/C/man3/ulimit.3:83 build/C/man7/user_namespaces.7:1011 build/C/man2/seccomp.2:662
322 #: build/C/man2/acct.2:144
327 #: build/C/man5/acct.5:25
333 #: build/C/man5/acct.5:28
334 msgid "acct - process accounting file"
338 #: build/C/man5/acct.5:30
339 msgid "B<#include E<lt>sys/acct.hE<gt>>"
343 #: build/C/man5/acct.5:36
345 "If the kernel is built with the process accounting option enabled "
346 "(B<CONFIG_BSD_PROCESS_ACCT>), then calling B<acct>(2) starts process "
347 "accounting, for example:"
351 #: build/C/man5/acct.5:39
352 msgid "acct(\"/var/log/pacct\");"
356 #: build/C/man5/acct.5:47
358 "When process accounting is enabled, the kernel writes a record to the "
359 "accounting file as each process on the system terminates. This record "
360 "contains information about the terminated process, and is defined in "
361 "I<E<lt>sys/acct.hE<gt>> as follows:"
365 #: build/C/man5/acct.5:51
367 msgid "#define ACCT_COMM 16\n"
371 #: build/C/man5/acct.5:53
373 msgid "typedef u_int16_t comp_t;\n"
377 #: build/C/man5/acct.5:77
381 " char ac_flag; /* Accounting flags */\n"
382 " u_int16_t ac_uid; /* Accounting user ID */\n"
383 " u_int16_t ac_gid; /* Accounting group ID */\n"
384 " u_int16_t ac_tty; /* Controlling terminal */\n"
385 " u_int32_t ac_btime; /* Process creation time\n"
386 " (seconds since the Epoch) */\n"
387 " comp_t ac_utime; /* User CPU time */\n"
388 " comp_t ac_stime; /* System CPU time */\n"
389 " comp_t ac_etime; /* Elapsed time */\n"
390 " comp_t ac_mem; /* Average memory usage (kB) */\n"
391 " comp_t ac_io; /* Characters transferred (unused) */\n"
392 " comp_t ac_rw; /* Blocks read or written (unused) */\n"
393 " comp_t ac_minflt; /* Minor page faults */\n"
394 " comp_t ac_majflt; /* Major page faults */\n"
395 " comp_t ac_swaps; /* Number of swaps (unused) */\n"
396 " u_int32_t ac_exitcode; /* Process termination status\n"
397 " (see wait(2)) */\n"
398 " char ac_comm[ACCT_COMM+1];\n"
399 " /* Command name (basename of last\n"
400 " executed command; null-terminated) */\n"
401 " char ac_pad[I<X>]; /* padding bytes */\n"
406 #: build/C/man5/acct.5:84
409 "enum { /* Bits that may be set in ac_flag field */\n"
410 " AFORK = 0x01, /* Has executed fork, but no exec */\n"
411 " ASU = 0x02, /* Used superuser privileges */\n"
412 " ACORE = 0x08, /* Dumped core */\n"
413 " AXSIG = 0x10 /* Killed by a signal */\n"
418 #: build/C/man5/acct.5:94
420 "The I<comp_t> data type is a floating-point value consisting of a 3-bit, "
421 "base-8 exponent, and a 13-bit mantissa. A value, I<c>, of this type can be "
422 "converted to a (long) integer as follows:"
426 #: build/C/man5/acct.5:97
428 msgid " v = (c & 0x1fff) E<lt>E<lt> (((c E<gt>E<gt> 13) & 0x7) * 3);\n"
432 #: build/C/man5/acct.5:107
434 "The I<ac_utime>, I<ac_stime>, and I<ac_etime> fields measure time in \"clock "
435 "ticks\"; divide these values by I<sysconf(_SC_CLK_TCK)> to convert them to "
440 #: build/C/man5/acct.5:107
442 msgid "Version 3 accounting file format"
446 #: build/C/man5/acct.5:122
448 "Since kernel 2.6.8, an optional alternative version of the accounting file "
449 "can be produced if the B<CONFIG_BSD_PROCESS_ACCT_V3> option is set when "
450 "building the kernel. With this option is set, the records written to the "
451 "accounting file contain additional fields, and the width of I<c_uid> and "
452 "I<ac_gid> fields is widened from 16 to 32 bits (in line with the increased "
453 "size of UID and GIDs in Linux 2.4 and later). The records are defined as "
458 #: build/C/man5/acct.5:147
462 " char ac_flag; /* Flags */\n"
463 " char ac_version; /* Always set to ACCT_VERSION (3) */\n"
464 " u_int16_t ac_tty; /* Controlling terminal */\n"
465 " u_int32_t ac_exitcode; /* Process termination status */\n"
466 " u_int32_t ac_uid; /* Real user ID */\n"
467 " u_int32_t ac_gid; /* Real group ID */\n"
468 " u_int32_t ac_pid; /* Process ID */\n"
469 " u_int32_t ac_ppid; /* Parent process ID */\n"
470 " u_int32_t ac_btime; /* Process creation time */\n"
471 " float ac_etime; /* Elapsed time */\n"
472 " comp_t ac_utime; /* User CPU time */\n"
473 " comp_t ac_stime; /* System time */\n"
474 " comp_t ac_mem; /* Average memory usage (kB) */\n"
475 " comp_t ac_io; /* Characters transferred (unused) */\n"
476 " comp_t ac_rw; /* Blocks read or written\n"
478 " comp_t ac_minflt; /* Minor page faults */\n"
479 " comp_t ac_majflt; /* Major page faults */\n"
480 " comp_t ac_swaps; /* Number of swaps (unused) */\n"
481 " char ac_comm[ACCT_COMM]; /* Command name */\n"
486 #: build/C/man5/acct.5:149 build/C/man7/cpuset.7:1338 build/C/man2/getresuid.2:60 build/C/man2/getrlimit.2:506 build/C/man2/getsid.2:75 build/C/man2/ioprio_set.2:193 build/C/man2/setfsgid.2:71 build/C/man2/setfsuid.2:71 build/C/man2/setresuid.2:107 build/C/man2/seccomp.2:430
492 #: build/C/man5/acct.5:153
493 msgid "The I<acct_v3> structure is defined in glibc since version 2.6."
497 #: build/C/man5/acct.5:157
499 "Process accounting originated on BSD. Although it is present on most "
500 "systems, it is not standardized, and the details vary somewhat between "
505 #: build/C/man5/acct.5:160
507 "Records in the accounting file are ordered by termination time of the "
512 #: build/C/man5/acct.5:167
514 "In kernels up to and including 2.6.9, a separate accounting record is "
515 "written for each thread created using the NPTL threading library; since "
516 "Linux 2.6.10, a single accounting record is written for the entire process "
517 "on termination of the last thread in the process."
521 #: build/C/man5/acct.5:174
523 "The I<proc/sys/kernel/acct> file, described in B<proc>(5), defines settings "
524 "that control the behavior of process accounting when disk space runs low."
528 #: build/C/man5/acct.5:178
529 msgid "B<lastcomm>(1), B<acct>(2), B<accton>(8), B<sa>(8)"
533 #: build/C/man7/capabilities.7:48
539 #: build/C/man7/capabilities.7:48 build/C/man2/getpid.2:25 build/C/man7/namespaces.7:27 build/C/man2/seteuid.2:29 build/C/man2/setgid.2:29 build/C/man2/setresuid.2:26 build/C/man2/setreuid.2:45 build/C/man2/setuid.2:30 build/C/man7/svipc.7:40 build/C/man7/user_namespaces.7:27
545 #: build/C/man7/capabilities.7:51
546 msgid "capabilities - overview of Linux capabilities"
550 #: build/C/man7/capabilities.7:63
552 "For the purpose of performing permission checks, traditional UNIX "
553 "implementations distinguish two categories of processes: I<privileged> "
554 "processes (whose effective user ID is 0, referred to as superuser or root), "
555 "and I<unprivileged> processes (whose effective UID is nonzero). Privileged "
556 "processes bypass all kernel permission checks, while unprivileged processes "
557 "are subject to full permission checking based on the process's credentials "
558 "(usually: effective UID, effective GID, and supplementary group list)."
562 #: build/C/man7/capabilities.7:70
564 "Starting with kernel 2.2, Linux divides the privileges traditionally "
565 "associated with superuser into distinct units, known as I<capabilities>, "
566 "which can be independently enabled and disabled. Capabilities are a "
567 "per-thread attribute."
571 #: build/C/man7/capabilities.7:70
573 msgid "Capabilities list"
577 #: build/C/man7/capabilities.7:73
579 "The following list shows the capabilities implemented on Linux, and the "
580 "operations or behaviors that each capability permits:"
584 #: build/C/man7/capabilities.7:73
586 msgid "B<CAP_AUDIT_CONTROL> (since Linux 2.6.11)"
590 #: build/C/man7/capabilities.7:77
592 "Enable and disable kernel auditing; change auditing filter rules; retrieve "
593 "auditing status and filtering rules."
597 #: build/C/man7/capabilities.7:77
599 msgid "B<CAP_AUDIT_READ> (since Linux 3.16)"
602 #. commit a29b694aa1739f9d76538e34ae25524f9c549d59
603 #. commit 3a101b8de0d39403b2c7e5c23fd0b005668acf48
605 #: build/C/man7/capabilities.7:82
606 msgid "Allow reading the audit log via a multicast netlink socket."
610 #: build/C/man7/capabilities.7:82
612 msgid "B<CAP_AUDIT_WRITE> (since Linux 2.6.11)"
616 #: build/C/man7/capabilities.7:85
617 msgid "Write records to kernel auditing log."
621 #: build/C/man7/capabilities.7:85
623 msgid "B<CAP_BLOCK_SUSPEND> (since Linux 3.5)"
627 #: build/C/man7/capabilities.7:91
629 "Employ features that can block system suspend (B<epoll>(7) B<EPOLLWAKEUP>, "
630 "I</proc/sys/wake_lock>)."
634 #: build/C/man7/capabilities.7:91
640 #: build/C/man7/capabilities.7:95
641 msgid "Make arbitrary changes to file UIDs and GIDs (see B<chown>(2))."
645 #: build/C/man7/capabilities.7:95
647 msgid "B<CAP_DAC_OVERRIDE>"
651 #: build/C/man7/capabilities.7:99
653 "Bypass file read, write, and execute permission checks. (DAC is an "
654 "abbreviation of \"discretionary access control\".)"
658 #: build/C/man7/capabilities.7:99
660 msgid "B<CAP_DAC_READ_SEARCH>"
664 #: build/C/man7/capabilities.7:103 build/C/man7/capabilities.7:106 build/C/man7/capabilities.7:116 build/C/man7/capabilities.7:126 build/C/man7/capabilities.7:130 build/C/man7/capabilities.7:132 build/C/man7/capabilities.7:134 build/C/man7/capabilities.7:204 build/C/man7/capabilities.7:206 build/C/man7/capabilities.7:208 build/C/man7/capabilities.7:210 build/C/man7/capabilities.7:212 build/C/man7/capabilities.7:214 build/C/man7/capabilities.7:216 build/C/man7/capabilities.7:218 build/C/man7/capabilities.7:220 build/C/man7/capabilities.7:244 build/C/man7/capabilities.7:246 build/C/man7/capabilities.7:296 build/C/man7/capabilities.7:306 build/C/man7/capabilities.7:312 build/C/man7/capabilities.7:317 build/C/man7/capabilities.7:323 build/C/man7/capabilities.7:327 build/C/man7/capabilities.7:334 build/C/man7/capabilities.7:337 build/C/man7/capabilities.7:345 build/C/man7/capabilities.7:347 build/C/man7/capabilities.7:356 build/C/man7/capabilities.7:365 build/C/man7/capabilities.7:368 build/C/man7/capabilities.7:372 build/C/man7/capabilities.7:380 build/C/man7/capabilities.7:383 build/C/man7/capabilities.7:390 build/C/man7/capabilities.7:395 build/C/man7/capabilities.7:401 build/C/man7/capabilities.7:405 build/C/man7/capabilities.7:409 build/C/man7/capabilities.7:413 build/C/man7/capabilities.7:417 build/C/man7/capabilities.7:444 build/C/man7/capabilities.7:449 build/C/man7/capabilities.7:455 build/C/man7/capabilities.7:458 build/C/man7/capabilities.7:461 build/C/man7/capabilities.7:471 build/C/man7/capabilities.7:475 build/C/man7/capabilities.7:492 build/C/man7/capabilities.7:495 build/C/man7/capabilities.7:499 build/C/man7/capabilities.7:504 build/C/man7/capabilities.7:513 build/C/man7/capabilities.7:518 build/C/man7/capabilities.7:521 build/C/man7/capabilities.7:526 build/C/man7/capabilities.7:529 build/C/man7/capabilities.7:532 build/C/man7/capabilities.7:535 build/C/man7/capabilities.7:538 build/C/man7/capabilities.7:543 build/C/man7/capabilities.7:545 build/C/man7/capabilities.7:551 build/C/man7/capabilities.7:559 build/C/man7/capabilities.7:561 build/C/man7/capabilities.7:565 build/C/man7/capabilities.7:567 build/C/man7/capabilities.7:570 build/C/man7/capabilities.7:574 build/C/man7/capabilities.7:576 build/C/man7/capabilities.7:578 build/C/man7/capabilities.7:580 build/C/man7/capabilities.7:589 build/C/man7/capabilities.7:596 build/C/man7/capabilities.7:601 build/C/man7/capabilities.7:606 build/C/man7/capabilities.7:611 build/C/man7/capabilities.7:636 build/C/man7/capabilities.7:643 build/C/man7/capabilities.7:844 build/C/man7/capabilities.7:852 build/C/man7/capabilities.7:1172 build/C/man7/capabilities.7:1177 build/C/man7/cpuset.7:540 build/C/man7/cpuset.7:545 build/C/man7/cpuset.7:550 build/C/man7/cpuset.7:726 build/C/man7/cpuset.7:730 build/C/man7/cpuset.7:927 build/C/man7/cpuset.7:930 build/C/man7/cpuset.7:934 build/C/man7/cpuset.7:938 build/C/man7/cpuset.7:942 build/C/man7/credentials.7:177 build/C/man7/credentials.7:183 build/C/man7/credentials.7:195 build/C/man7/credentials.7:217 build/C/man7/credentials.7:234 build/C/man7/credentials.7:266 build/C/man7/credentials.7:269 build/C/man7/credentials.7:280 build/C/man7/credentials.7:283 build/C/man2/getrlimit.2:690 build/C/man2/getrlimit.2:693 build/C/man7/namespaces.7:212 build/C/man7/namespaces.7:215 build/C/man7/namespaces.7:228 build/C/man7/pid_namespaces.7:233 build/C/man7/pid_namespaces.7:241 build/C/man7/pid_namespaces.7:252 build/C/man7/user_namespaces.7:261 build/C/man7/user_namespaces.7:266 build/C/man7/user_namespaces.7:272 build/C/man7/user_namespaces.7:285 build/C/man7/user_namespaces.7:306 build/C/man7/user_namespaces.7:474 build/C/man7/user_namespaces.7:477 build/C/man7/user_namespaces.7:479 build/C/man7/user_namespaces.7:492 build/C/man7/user_namespaces.7:505 build/C/man7/user_namespaces.7:532 build/C/man7/user_namespaces.7:541 build/C/man2/seccomp.2:265 build/C/man2/seccomp.2:269 build/C/man2/seccomp.2:272 build/C/man2/seccomp.2:277 build/C/man2/seccomp.2:281 build/C/man2/seccomp.2:455 build/C/man2/seccomp.2:463 build/C/man2/seccomp.2:469
670 #: build/C/man7/capabilities.7:106
672 "Bypass file read permission checks and directory read and execute permission "
677 #: build/C/man7/capabilities.7:109
678 msgid "Invoke B<open_by_handle_at>(2)."
682 #: build/C/man7/capabilities.7:112
684 msgid "B<CAP_FOWNER>"
688 #: build/C/man7/capabilities.7:126
690 "Bypass permission checks on operations that normally require the filesystem "
691 "UID of the process to match the UID of the file (e.g., B<chmod>(2), "
692 "B<utime>(2)), excluding those operations covered by B<CAP_DAC_OVERRIDE> and "
693 "B<CAP_DAC_READ_SEARCH>;"
697 #: build/C/man7/capabilities.7:130
698 msgid "set extended file attributes (see B<chattr>(1)) on arbitrary files;"
702 #: build/C/man7/capabilities.7:132
703 msgid "set Access Control Lists (ACLs) on arbitrary files;"
707 #: build/C/man7/capabilities.7:134
708 msgid "ignore directory sticky bit on file deletion;"
712 #: build/C/man7/capabilities.7:141
713 msgid "specify B<O_NOATIME> for arbitrary files in B<open>(2) and B<fcntl>(2)."
717 #: build/C/man7/capabilities.7:143
719 msgid "B<CAP_FSETID>"
723 #: build/C/man7/capabilities.7:149
725 "Don't clear set-user-ID and set-group-ID permission bits when a file is "
726 "modified; set the set-group-ID bit for a file whose GID does not match the "
727 "filesystem or any of the supplementary GIDs of the calling process."
731 #: build/C/man7/capabilities.7:149
733 msgid "B<CAP_IPC_LOCK>"
736 #. FIXME . As at Linux 3.2, there are some strange uses of this capability
737 #. in other places; they probably should be replaced with something else.
739 #: build/C/man7/capabilities.7:158
740 msgid "Lock memory (B<mlock>(2), B<mlockall>(2), B<mmap>(2), B<shmctl>(2))."
744 #: build/C/man7/capabilities.7:158
746 msgid "B<CAP_IPC_OWNER>"
750 #: build/C/man7/capabilities.7:161
751 msgid "Bypass permission checks for operations on System V IPC objects."
755 #: build/C/man7/capabilities.7:161
760 #. FIXME . CAP_KILL also has an effect for threads + setting child
761 #. termination signal to other than SIGCHLD: without this
762 #. capability, the termination signal reverts to SIGCHLD
763 #. if the child does an exec(). What is the rationale
766 #: build/C/man7/capabilities.7:174
768 "Bypass permission checks for sending signals (see B<kill>(2)). This "
769 "includes use of the B<ioctl>(2) B<KDSIGACCEPT> operation."
773 #: build/C/man7/capabilities.7:174
775 msgid "B<CAP_LEASE> (since Linux 2.4)"
779 #: build/C/man7/capabilities.7:178
780 msgid "Establish leases on arbitrary files (see B<fcntl>(2))."
784 #: build/C/man7/capabilities.7:178
786 msgid "B<CAP_LINUX_IMMUTABLE>"
789 #. These attributes are now available on ext2, ext3, Reiserfs, XFS, JFS
791 #: build/C/man7/capabilities.7:187
793 "Set the B<FS_APPEND_FL> and B<FS_IMMUTABLE_FL> inode flags (see "
798 #: build/C/man7/capabilities.7:187
800 msgid "B<CAP_MAC_ADMIN> (since Linux 2.6.25)"
804 #: build/C/man7/capabilities.7:191
806 "Override Mandatory Access Control (MAC). Implemented for the Smack Linux "
807 "Security Module (LSM)."
811 #: build/C/man7/capabilities.7:191
813 msgid "B<CAP_MAC_OVERRIDE> (since Linux 2.6.25)"
817 #: build/C/man7/capabilities.7:195
818 msgid "Allow MAC configuration or state changes. Implemented for the Smack LSM."
822 #: build/C/man7/capabilities.7:195
824 msgid "B<CAP_MKNOD> (since Linux 2.4)"
828 #: build/C/man7/capabilities.7:199
829 msgid "Create special files using B<mknod>(2)."
833 #: build/C/man7/capabilities.7:199
835 msgid "B<CAP_NET_ADMIN>"
839 #: build/C/man7/capabilities.7:202
840 msgid "Perform various network-related operations:"
844 #: build/C/man7/capabilities.7:206
845 msgid "interface configuration;"
849 #: build/C/man7/capabilities.7:208
850 msgid "administration of IP firewall, masquerading, and accounting;"
854 #: build/C/man7/capabilities.7:210
855 msgid "modify routing tables;"
859 #: build/C/man7/capabilities.7:212
860 msgid "bind to any address for transparent proxying;"
864 #: build/C/man7/capabilities.7:214
865 msgid "set type-of-service (TOS)"
869 #: build/C/man7/capabilities.7:216
870 msgid "clear driver statistics;"
874 #: build/C/man7/capabilities.7:218
875 msgid "set promiscuous mode;"
879 #: build/C/man7/capabilities.7:220
880 msgid "enabling multicasting;"
884 #: build/C/man7/capabilities.7:231
886 "use B<setsockopt>(2) to set the following socket options: B<SO_DEBUG>, "
887 "B<SO_MARK>, B<SO_PRIORITY> (for a priority outside the range 0 to 6), "
888 "B<SO_RCVBUFFORCE>, and B<SO_SNDBUFFORCE>."
892 #: build/C/man7/capabilities.7:233
894 msgid "B<CAP_NET_BIND_SERVICE>"
898 #: build/C/man7/capabilities.7:237
900 "Bind a socket to Internet domain privileged ports (port numbers less than "
905 #: build/C/man7/capabilities.7:237
907 msgid "B<CAP_NET_BROADCAST>"
911 #: build/C/man7/capabilities.7:240
912 msgid "(Unused) Make socket broadcasts, and listen to multicasts."
916 #: build/C/man7/capabilities.7:240
918 msgid "B<CAP_NET_RAW>"
922 #: build/C/man7/capabilities.7:246
923 msgid "use RAW and PACKET sockets;"
927 #: build/C/man7/capabilities.7:248
928 msgid "bind to any address for transparent proxying."
932 #: build/C/man7/capabilities.7:251
934 msgid "B<CAP_SETGID>"
938 #: build/C/man7/capabilities.7:257
940 "Make arbitrary manipulations of process GIDs and supplementary GID list; "
941 "forge GID when passing socket credentials via UNIX domain sockets; write a "
942 "group ID mapping in a user namespace (see B<user_namespaces>(7))."
946 #: build/C/man7/capabilities.7:257
948 msgid "B<CAP_SETFCAP> (since Linux 2.6.24)"
952 #: build/C/man7/capabilities.7:260
953 msgid "Set file capabilities."
957 #: build/C/man7/capabilities.7:260
959 msgid "B<CAP_SETPCAP>"
963 #: build/C/man7/capabilities.7:271
965 "If file capabilities are not supported: grant or remove any capability in "
966 "the caller's permitted capability set to or from any other process. (This "
967 "property of B<CAP_SETPCAP> is not available when the kernel is configured to "
968 "support file capabilities, since B<CAP_SETPCAP> has entirely different "
969 "semantics for such kernels.)"
973 #: build/C/man7/capabilities.7:281
975 "If file capabilities are supported: add any capability from the calling "
976 "thread's bounding set to its inheritable set; drop capabilities from the "
977 "bounding set (via B<prctl>(2) B<PR_CAPBSET_DROP>); make changes to the "
978 "I<securebits> flags."
982 #: build/C/man7/capabilities.7:281
984 msgid "B<CAP_SETUID>"
987 #. FIXME CAP_SETUID also an effect in exec(); document this.
989 #: build/C/man7/capabilities.7:292
991 "Make arbitrary manipulations of process UIDs (B<setuid>(2), B<setreuid>(2), "
992 "B<setresuid>(2), B<setfsuid>(2)); forge UID when passing socket credentials "
993 "via UNIX domain sockets; write a user ID mapping in a user namespace (see "
994 "B<user_namespaces>(7))."
998 #: build/C/man7/capabilities.7:292
1000 msgid "B<CAP_SYS_ADMIN>"
1004 #: build/C/man7/capabilities.7:306
1006 "Perform a range of system administration operations including: "
1007 "B<quotactl>(2), B<mount>(2), B<umount>(2), B<swapon>(2), B<swapoff>(2), "
1008 "B<sethostname>(2), and B<setdomainname>(2);"
1012 #: build/C/man7/capabilities.7:312
1014 "perform privileged B<syslog>(2) operations (since Linux 2.6.37, "
1015 "B<CAP_SYSLOG> should be used to permit such operations);"
1019 #: build/C/man7/capabilities.7:317
1020 msgid "perform B<VM86_REQUEST_IRQ> B<vm86>(2) command;"
1024 #: build/C/man7/capabilities.7:323
1026 "perform B<IPC_SET> and B<IPC_RMID> operations on arbitrary System V IPC "
1031 #: build/C/man7/capabilities.7:327 build/C/man7/capabilities.7:574
1032 msgid "override B<RLIMIT_NPROC> resource limit;"
1036 #: build/C/man7/capabilities.7:334
1038 "perform operations on I<trusted> and I<security> Extended Attributes (see "
1043 #: build/C/man7/capabilities.7:337
1044 msgid "use B<lookup_dcookie>(2);"
1048 #: build/C/man7/capabilities.7:345
1050 "use B<ioprio_set>(2) to assign B<IOPRIO_CLASS_RT> and (before Linux 2.6.25) "
1051 "B<IOPRIO_CLASS_IDLE> I/O scheduling classes;"
1055 #: build/C/man7/capabilities.7:347
1056 msgid "forge PID when passing socket credentials via UNIX domain sockets;"
1060 #: build/C/man7/capabilities.7:356
1062 "exceed I</proc/sys/fs/file-max>, the system-wide limit on the number of open "
1063 "files, in system calls that open files (e.g., B<accept>(2), B<execve>(2), "
1064 "B<open>(2), B<pipe>(2));"
1068 #: build/C/man7/capabilities.7:365
1070 "employ B<CLONE_*> flags that create new namespaces with B<clone>(2) and "
1071 "B<unshare>(2) (but, since Linux 3.8, creating user namespaces does not "
1072 "require any capability);"
1076 #: build/C/man7/capabilities.7:368
1077 msgid "call B<perf_event_open>(2);"
1081 #: build/C/man7/capabilities.7:372
1082 msgid "access privileged I<perf> event information;"
1086 #: build/C/man7/capabilities.7:380
1087 msgid "call B<setns>(2) (requires B<CAP_SYS_ADMIN> in the I<target> namespace);"
1091 #: build/C/man7/capabilities.7:383
1092 msgid "call B<fanotify_init>(2);"
1096 #: build/C/man7/capabilities.7:390
1097 msgid "perform B<KEYCTL_CHOWN> and B<KEYCTL_SETPERM> B<keyctl>(2) operations;"
1101 #: build/C/man7/capabilities.7:395
1102 msgid "perform B<madvise>(2) B<MADV_HWPOISON> operation;"
1106 #: build/C/man7/capabilities.7:401
1108 "employ the B<TIOCSTI> B<ioctl>(2) to insert characters into the input queue "
1109 "of a terminal other than the caller's controlling terminal;"
1113 #: build/C/man7/capabilities.7:405
1114 msgid "employ the obsolete B<nfsservctl>(2) system call;"
1118 #: build/C/man7/capabilities.7:409
1119 msgid "employ the obsolete B<bdflush>(2) system call;"
1123 #: build/C/man7/capabilities.7:413
1124 msgid "perform various privileged block-device B<ioctl>(2) operations;"
1128 #: build/C/man7/capabilities.7:417
1129 msgid "perform various privileged filesystem B<ioctl>(2) operations;"
1133 #: build/C/man7/capabilities.7:419
1134 msgid "perform administrative operations on many device drivers."
1138 #: build/C/man7/capabilities.7:421
1140 msgid "B<CAP_SYS_BOOT>"
1144 #: build/C/man7/capabilities.7:427
1145 msgid "Use B<reboot>(2) and B<kexec_load>(2)."
1149 #: build/C/man7/capabilities.7:427
1151 msgid "B<CAP_SYS_CHROOT>"
1155 #: build/C/man7/capabilities.7:431
1156 msgid "Use B<chroot>(2)."
1160 #: build/C/man7/capabilities.7:431
1162 msgid "B<CAP_SYS_MODULE>"
1166 #: build/C/man7/capabilities.7:440
1168 "Load and unload kernel modules (see B<init_module>(2) and "
1169 "B<delete_module>(2)); in kernels before 2.6.25: drop capabilities from the "
1170 "system-wide capability bounding set."
1174 #: build/C/man7/capabilities.7:440
1176 msgid "B<CAP_SYS_NICE>"
1180 #: build/C/man7/capabilities.7:449
1182 "Raise process nice value (B<nice>(2), B<setpriority>(2)) and change the "
1183 "nice value for arbitrary processes;"
1187 #: build/C/man7/capabilities.7:455
1189 "set real-time scheduling policies for calling process, and set scheduling "
1190 "policies and priorities for arbitrary processes (B<sched_setscheduler>(2), "
1191 "B<sched_setparam>(2), B<shed_setattr>(2));"
1195 #: build/C/man7/capabilities.7:458
1196 msgid "set CPU affinity for arbitrary processes (B<sched_setaffinity>(2));"
1200 #: build/C/man7/capabilities.7:461
1202 "set I/O scheduling class and priority for arbitrary processes "
1203 "(B<ioprio_set>(2));"
1206 #. FIXME CAP_SYS_NICE also has the following effect for
1207 #. migrate_pages(2):
1208 #. do_migrate_pages(mm, &old, &new,
1209 #. capable(CAP_SYS_NICE) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE);
1212 #: build/C/man7/capabilities.7:471
1214 "apply B<migrate_pages>(2) to arbitrary processes and allow processes to be "
1215 "migrated to arbitrary nodes;"
1219 #: build/C/man7/capabilities.7:475
1220 msgid "apply B<move_pages>(2) to arbitrary processes;"
1224 #: build/C/man7/capabilities.7:482
1225 msgid "use the B<MPOL_MF_MOVE_ALL> flag with B<mbind>(2) and B<move_pages>(2)."
1229 #: build/C/man7/capabilities.7:484
1231 msgid "B<CAP_SYS_PACCT>"
1235 #: build/C/man7/capabilities.7:488
1236 msgid "Use B<acct>(2)."
1240 #: build/C/man7/capabilities.7:488
1242 msgid "B<CAP_SYS_PTRACE>"
1246 #: build/C/man7/capabilities.7:495
1247 msgid "Trace arbitrary processes using B<ptrace>(2);"
1251 #: build/C/man7/capabilities.7:499
1252 msgid "apply B<get_robust_list>(2) to arbitrary processes;"
1256 #: build/C/man7/capabilities.7:504
1258 "transfer data to or from the memory of arbitrary processes using "
1259 "B<process_vm_readv>(2) and B<process_vm_writev>(2)."
1263 #: build/C/man7/capabilities.7:507
1264 msgid "inspect processes using B<kcmp>(2)."
1268 #: build/C/man7/capabilities.7:509
1270 msgid "B<CAP_SYS_RAWIO>"
1274 #: build/C/man7/capabilities.7:518
1275 msgid "Perform I/O port operations (B<iopl>(2) and B<ioperm>(2));"
1279 #: build/C/man7/capabilities.7:521
1280 msgid "access I</proc/kcore>;"
1284 #: build/C/man7/capabilities.7:526
1285 msgid "employ the B<FIBMAP> B<ioctl>(2) operation;"
1289 #: build/C/man7/capabilities.7:529
1291 "open devices for accessing x86 model-specific registers (MSRs, see "
1296 #: build/C/man7/capabilities.7:532
1297 msgid "update I</proc/sys/vm/mmap_min_addr>;"
1301 #: build/C/man7/capabilities.7:535
1303 "create memory mappings at addresses below the value specified by "
1304 "I</proc/sys/vm/mmap_min_addr>;"
1308 #: build/C/man7/capabilities.7:538
1309 msgid "map files in I</proc/bus/pci>;"
1313 #: build/C/man7/capabilities.7:543
1314 msgid "open I</dev/mem> and I</dev/kmem>;"
1318 #: build/C/man7/capabilities.7:545
1319 msgid "perform various SCSI device commands;"
1323 #: build/C/man7/capabilities.7:551
1324 msgid "perform certain operations on B<hpsa>(4) and B<cciss>(4) devices;"
1328 #: build/C/man7/capabilities.7:553
1329 msgid "perform a range of device-specific operations on other devices."
1333 #: build/C/man7/capabilities.7:555
1335 msgid "B<CAP_SYS_RESOURCE>"
1339 #: build/C/man7/capabilities.7:561
1340 msgid "Use reserved space on ext2 filesystems;"
1344 #: build/C/man7/capabilities.7:565
1345 msgid "make B<ioctl>(2) calls controlling ext3 journaling;"
1349 #: build/C/man7/capabilities.7:567
1350 msgid "override disk quota limits;"
1354 #: build/C/man7/capabilities.7:570
1355 msgid "increase resource limits (see B<setrlimit>(2));"
1359 #: build/C/man7/capabilities.7:576
1360 msgid "override maximum number of consoles on console allocation;"
1364 #: build/C/man7/capabilities.7:578
1365 msgid "override maximum number of keymaps;"
1369 #: build/C/man7/capabilities.7:580
1370 msgid "allow more than 64hz interrupts from the real-time clock;"
1374 #: build/C/man7/capabilities.7:589
1376 "raise I<msg_qbytes> limit for a System V message queue above the limit in "
1377 "I</proc/sys/kernel/msgmnb> (see B<msgop>(2) and B<msgctl>(2));"
1381 #: build/C/man7/capabilities.7:596
1383 "override the I</proc/sys/fs/pipe-size-max> limit when setting the capacity "
1384 "of a pipe using the B<F_SETPIPE_SZ> B<fcntl>(2) command."
1388 #: build/C/man7/capabilities.7:601
1390 "use B<F_SETPIPE_SZ> to increase the capacity of a pipe above the limit "
1391 "specified by I</proc/sys/fs/pipe-max-size>;"
1395 #: build/C/man7/capabilities.7:606
1397 "override I</proc/sys/fs/mqueue/queues_max> limit when creating POSIX message "
1398 "queues (see B<mq_overview>(7));"
1402 #: build/C/man7/capabilities.7:611
1403 msgid "employ B<prctl>(2) B<PR_SET_MM> operation;"
1407 #: build/C/man7/capabilities.7:616
1409 "set I</proc/PID/oom_score_adj> to a value lower than the value last set by a "
1410 "process with B<CAP_SYS_RESOURCE>."
1414 #: build/C/man7/capabilities.7:618
1416 msgid "B<CAP_SYS_TIME>"
1420 #: build/C/man7/capabilities.7:625
1422 "Set system clock (B<settimeofday>(2), B<stime>(2), B<adjtimex>(2)); set "
1423 "real-time (hardware) clock."
1427 #: build/C/man7/capabilities.7:625
1429 msgid "B<CAP_SYS_TTY_CONFIG>"
1433 #: build/C/man7/capabilities.7:632
1435 "Use B<vhangup>(2); employ various privileged B<ioctl>(2) operations on "
1436 "virtual terminals."
1440 #: build/C/man7/capabilities.7:632
1442 msgid "B<CAP_SYSLOG> (since Linux 2.6.37)"
1446 #: build/C/man7/capabilities.7:643
1448 "Perform privileged B<syslog>(2) operations. See B<syslog>(2) for "
1449 "information on which operations require privilege."
1453 #: build/C/man7/capabilities.7:653
1455 "View kernel addresses exposed via I</proc> and other interfaces when "
1456 "I</proc/sys/kernel/kptr_restrict> has the value 1. (See the discussion of "
1457 "the I<kptr_restrict> in B<proc>(5).)"
1461 #: build/C/man7/capabilities.7:655
1463 msgid "B<CAP_WAKE_ALARM> (since Linux 3.0)"
1467 #: build/C/man7/capabilities.7:663
1469 "Trigger something that will wake up the system (set B<CLOCK_REALTIME_ALARM> "
1470 "and B<CLOCK_BOOTTIME_ALARM> timers)."
1474 #: build/C/man7/capabilities.7:663
1476 msgid "Past and current implementation"
1480 #: build/C/man7/capabilities.7:665
1481 msgid "A full implementation of capabilities requires that:"
1485 #: build/C/man7/capabilities.7:665 build/C/man7/capabilities.7:816 build/C/man7/capabilities.7:963 build/C/man7/capabilities.7:1016 build/C/man7/user_namespaces.7:173 build/C/man7/user_namespaces.7:515
1491 #: build/C/man7/capabilities.7:669
1493 "For all privileged operations, the kernel must check whether the thread has "
1494 "the required capability in its effective set."
1498 #: build/C/man7/capabilities.7:669 build/C/man7/capabilities.7:821 build/C/man7/capabilities.7:969 build/C/man7/capabilities.7:1022 build/C/man7/user_namespaces.7:189 build/C/man7/user_namespaces.7:521
1504 #: build/C/man7/capabilities.7:672
1506 "The kernel must provide system calls allowing a thread's capability sets to "
1507 "be changed and retrieved."
1511 #: build/C/man7/capabilities.7:672 build/C/man7/capabilities.7:972 build/C/man7/capabilities.7:1026 build/C/man7/user_namespaces.7:193 build/C/man7/user_namespaces.7:526
1517 #: build/C/man7/capabilities.7:675
1519 "The filesystem must support attaching capabilities to an executable file, so "
1520 "that a process gains those capabilities when the file is executed."
1524 #: build/C/man7/capabilities.7:679
1526 "Before kernel 2.6.24, only the first two of these requirements are met; "
1527 "since kernel 2.6.24, all three requirements are met."
1531 #: build/C/man7/capabilities.7:679
1533 msgid "Thread capability sets"
1537 #: build/C/man7/capabilities.7:682
1539 "Each thread has three capability sets containing zero or more of the above "
1544 #: build/C/man7/capabilities.7:682
1546 msgid "I<Permitted>:"
1550 #: build/C/man7/capabilities.7:690
1552 "This is a limiting superset for the effective capabilities that the thread "
1553 "may assume. It is also a limiting superset for the capabilities that may be "
1554 "added to the inheritable set by a thread that does not have the "
1555 "B<CAP_SETPCAP> capability in its effective set."
1559 #: build/C/man7/capabilities.7:696
1561 "If a thread drops a capability from its permitted set, it can never "
1562 "reacquire that capability (unless it B<execve>(2)s either a set-user-ID-root "
1563 "program, or a program whose associated file capabilities grant that "
1568 #: build/C/man7/capabilities.7:696
1570 msgid "I<Inheritable>:"
1574 #: build/C/man7/capabilities.7:703
1576 "This is a set of capabilities preserved across an B<execve>(2). It provides "
1577 "a mechanism for a process to assign capabilities to the permitted set of the "
1578 "new program during an B<execve>(2)."
1582 #: build/C/man7/capabilities.7:703 build/C/man7/capabilities.7:753
1584 msgid "I<Effective>:"
1588 #: build/C/man7/capabilities.7:707
1590 "This is the set of capabilities used by the kernel to perform permission "
1591 "checks for the thread."
1595 #: build/C/man7/capabilities.7:713
1597 "A child created via B<fork>(2) inherits copies of its parent's capability "
1598 "sets. See below for a discussion of the treatment of capabilities during "
1603 #: build/C/man7/capabilities.7:717
1605 "Using B<capset>(2), a thread may manipulate its own capability sets (see "
1609 #. commit 73efc0394e148d0e15583e13712637831f926720
1611 #: build/C/man7/capabilities.7:726
1613 "Since Linux 3.2, the file I</proc/sys/kernel/cap_last_cap> exposes the "
1614 "numerical value of the highest capability supported by the running kernel; "
1615 "this can be used to determine the highest bit that may be set in a "
1620 #: build/C/man7/capabilities.7:726
1622 msgid "File capabilities"
1626 #: build/C/man7/capabilities.7:741
1628 "Since kernel 2.6.24, the kernel supports associating capability sets with an "
1629 "executable file using B<setcap>(8). The file capability sets are stored in "
1630 "an extended attribute (see B<setxattr>(2)) named I<security.capability>. "
1631 "Writing to this extended attribute requires the B<CAP_SETFCAP> capability. "
1632 "The file capability sets, in conjunction with the capability sets of the "
1633 "thread, determine the capabilities of a thread after an B<execve>(2)."
1637 #: build/C/man7/capabilities.7:743
1638 msgid "The three file capability sets are:"
1642 #: build/C/man7/capabilities.7:743
1644 msgid "I<Permitted> (formerly known as I<forced>):"
1648 #: build/C/man7/capabilities.7:747
1650 "These capabilities are automatically permitted to the thread, regardless of "
1651 "the thread's inheritable capabilities."
1655 #: build/C/man7/capabilities.7:747
1657 msgid "I<Inheritable> (formerly known as I<allowed>):"
1661 #: build/C/man7/capabilities.7:753
1663 "This set is ANDed with the thread's inheritable set to determine which "
1664 "inheritable capabilities are enabled in the permitted set of the thread "
1665 "after the B<execve>(2)."
1669 #: build/C/man7/capabilities.7:763
1671 "This is not a set, but rather just a single bit. If this bit is set, then "
1672 "during an B<execve>(2) all of the new permitted capabilities for the thread "
1673 "are also raised in the effective set. If this bit is not set, then after an "
1674 "B<execve>(2), none of the new permitted capabilities is in the new effective "
1679 #: build/C/man7/capabilities.7:779
1681 "Enabling the file effective capability bit implies that any file permitted "
1682 "or inheritable capability that causes a thread to acquire the corresponding "
1683 "permitted capability during an B<execve>(2) (see the transformation rules "
1684 "described below) will also acquire that capability in its effective set. "
1685 "Therefore, when assigning capabilities to a file (B<setcap>(8), "
1686 "B<cap_set_file>(3), B<cap_set_fd>(3)), if we specify the effective flag as "
1687 "being enabled for any capability, then the effective flag must also be "
1688 "specified as enabled for all other capabilities for which the corresponding "
1689 "permitted or inheritable flags is enabled."
1693 #: build/C/man7/capabilities.7:779
1695 msgid "Transformation of capabilities during execve()"
1699 #: build/C/man7/capabilities.7:785
1701 "During an B<execve>(2), the kernel calculates the new capabilities of the "
1702 "process using the following algorithm:"
1706 #: build/C/man7/capabilities.7:790
1709 "P'(permitted) = (P(inheritable) & F(inheritable)) |\n"
1710 " (F(permitted) & cap_bset)\n"
1714 #: build/C/man7/capabilities.7:792
1716 msgid "P'(effective) = F(effective) ? P'(permitted) : 0\n"
1720 #: build/C/man7/capabilities.7:794
1722 msgid "P'(inheritable) = P(inheritable) [i.e., unchanged]\n"
1726 #: build/C/man7/capabilities.7:798
1731 #: build/C/man7/capabilities.7:799
1737 #: build/C/man7/capabilities.7:802
1738 msgid "denotes the value of a thread capability set before the B<execve>(2)"
1742 #: build/C/man7/capabilities.7:802
1748 #: build/C/man7/capabilities.7:805
1749 msgid "denotes the value of a capability set after the B<execve>(2)"
1753 #: build/C/man7/capabilities.7:805
1759 #: build/C/man7/capabilities.7:807
1760 msgid "denotes a file capability set"
1764 #: build/C/man7/capabilities.7:807
1770 #: build/C/man7/capabilities.7:809
1771 msgid "is the value of the capability bounding set (described below)."
1775 #: build/C/man7/capabilities.7:811
1777 msgid "Capabilities and execution of programs by root"
1781 #: build/C/man7/capabilities.7:816
1783 "In order to provide an all-powerful I<root> using capability sets, during an "
1788 #: build/C/man7/capabilities.7:821
1790 "If a set-user-ID-root program is being executed, or the real user ID of the "
1791 "process is 0 (root) then the file inheritable and permitted sets are "
1792 "defined to be all ones (i.e., all capabilities enabled)."
1796 #: build/C/man7/capabilities.7:824
1798 "If a set-user-ID-root program is being executed, then the file effective bit "
1799 "is defined to be one (enabled)."
1802 #. If a process with real UID 0, and nonzero effective UID does an
1803 #. exec(), then it gets all capabilities in its
1804 #. permitted set, and no effective capabilities
1806 #: build/C/man7/capabilities.7:839
1808 "The upshot of the above rules, combined with the capabilities "
1809 "transformations described above, is that when a process B<execve>(2)s a "
1810 "set-user-ID-root program, or when a process with an effective UID of 0 "
1811 "B<execve>(2)s a program, it gains all capabilities in its permitted and "
1812 "effective capability sets, except those masked out by the capability "
1813 "bounding set. This provides semantics that are the same as those provided "
1814 "by traditional UNIX systems."
1818 #: build/C/man7/capabilities.7:839
1820 msgid "Capability bounding set"
1824 #: build/C/man7/capabilities.7:844
1826 "The capability bounding set is a security mechanism that can be used to "
1827 "limit the capabilities that can be gained during an B<execve>(2). The "
1828 "bounding set is used in the following ways:"
1832 #: build/C/man7/capabilities.7:852
1834 "During an B<execve>(2), the capability bounding set is ANDed with the file "
1835 "permitted capability set, and the result of this operation is assigned to "
1836 "the thread's permitted capability set. The capability bounding set thus "
1837 "places a limit on the permitted capabilities that may be granted by an "
1842 #: build/C/man7/capabilities.7:864
1844 "(Since Linux 2.6.25) The capability bounding set acts as a limiting "
1845 "superset for the capabilities that a thread can add to its inheritable set "
1846 "using B<capset>(2). This means that if a capability is not in the bounding "
1847 "set, then a thread can't add this capability to its inheritable set, even if "
1848 "it was in its permitted capabilities, and thereby cannot have this "
1849 "capability preserved in its permitted set when it B<execve>(2)s a file that "
1850 "has the capability in its inheritable set."
1854 #: build/C/man7/capabilities.7:871
1856 "Note that the bounding set masks the file permitted capabilities, but not "
1857 "the inherited capabilities. If a thread maintains a capability in its "
1858 "inherited set that is not in its bounding set, then it can still gain that "
1859 "capability in its permitted set by executing a file that has the capability "
1860 "in its inherited set."
1864 #: build/C/man7/capabilities.7:874
1866 "Depending on the kernel version, the capability bounding set is either a "
1867 "system-wide attribute, or a per-process attribute."
1871 #: build/C/man7/capabilities.7:876
1872 msgid "B<Capability bounding set prior to Linux 2.6.25>"
1876 #: build/C/man7/capabilities.7:884
1878 "In kernels before 2.6.25, the capability bounding set is a system-wide "
1879 "attribute that affects all threads on the system. The bounding set is "
1880 "accessible via the file I</proc/sys/kernel/cap-bound>. (Confusingly, this "
1881 "bit mask parameter is expressed as a signed decimal number in "
1882 "I</proc/sys/kernel/cap-bound>.)"
1886 #: build/C/man7/capabilities.7:891
1888 "Only the B<init> process may set capabilities in the capability bounding "
1889 "set; other than that, the superuser (more precisely: programs with the "
1890 "B<CAP_SYS_MODULE> capability) may only clear capabilities from this set."
1894 #: build/C/man7/capabilities.7:900
1896 "On a standard system the capability bounding set always masks out the "
1897 "B<CAP_SETPCAP> capability. To remove this restriction (dangerous!), modify "
1898 "the definition of B<CAP_INIT_EFF_SET> in I<include/linux/capability.h> and "
1899 "rebuild the kernel."
1903 #: build/C/man7/capabilities.7:904
1905 "The system-wide capability bounding set feature was added to Linux starting "
1906 "with kernel version 2.2.11."
1910 #: build/C/man7/capabilities.7:906
1911 msgid "B<Capability bounding set from Linux 2.6.25 onward>"
1915 #: build/C/man7/capabilities.7:911
1917 "From Linux 2.6.25, the I<capability bounding set> is a per-thread "
1918 "attribute. (There is no longer a system-wide capability bounding set.)"
1922 #: build/C/man7/capabilities.7:916
1924 "The bounding set is inherited at B<fork>(2) from the thread's parent, and "
1925 "is preserved across an B<execve>(2)."
1929 #: build/C/man7/capabilities.7:929
1931 "A thread may remove capabilities from its capability bounding set using the "
1932 "B<prctl>(2) B<PR_CAPBSET_DROP> operation, provided it has the "
1933 "B<CAP_SETPCAP> capability. Once a capability has been dropped from the "
1934 "bounding set, it cannot be restored to that set. A thread can determine if "
1935 "a capability is in its bounding set using the B<prctl>(2) "
1936 "B<PR_CAPBSET_READ> operation."
1940 #: build/C/man7/capabilities.7:947
1942 "Removing capabilities from the bounding set is supported only if file "
1943 "capabilities are compiled into the kernel. In kernels before Linux 2.6.33, "
1944 "file capabilities were an optional feature configurable via the "
1945 "B<CONFIG_SECURITY_FILE_CAPABILITIES> option. Since Linux 2.6.33, the "
1946 "configuration option has been removed and file capabilities are always part "
1947 "of the kernel. When file capabilities are compiled into the kernel, the "
1948 "B<init> process (the ancestor of all processes) begins with a full bounding "
1949 "set. If file capabilities are not compiled into the kernel, then B<init> "
1950 "begins with a full bounding set minus B<CAP_SETPCAP>, because this "
1951 "capability has a different meaning when there are no file capabilities."
1955 #: build/C/man7/capabilities.7:954
1957 "Removing a capability from the bounding set does not remove it from the "
1958 "thread's inherited set. However it does prevent the capability from being "
1959 "added back into the thread's inherited set in the future."
1963 #: build/C/man7/capabilities.7:954
1965 msgid "Effect of user ID changes on capabilities"
1969 #: build/C/man7/capabilities.7:963
1971 "To preserve the traditional semantics for transitions between 0 and nonzero "
1972 "user IDs, the kernel makes the following changes to a thread's capability "
1973 "sets on changes to the thread's real, effective, saved set, and filesystem "
1974 "user IDs (using B<setuid>(2), B<setresuid>(2), or similar):"
1978 #: build/C/man7/capabilities.7:969
1980 "If one or more of the real, effective or saved set user IDs was previously "
1981 "0, and as a result of the UID changes all of these IDs have a nonzero value, "
1982 "then all capabilities are cleared from the permitted and effective "
1987 #: build/C/man7/capabilities.7:972
1989 "If the effective user ID is changed from 0 to nonzero, then all capabilities "
1990 "are cleared from the effective set."
1994 #: build/C/man7/capabilities.7:975
1996 "If the effective user ID is changed from nonzero to 0, then the permitted "
1997 "set is copied to the effective set."
2001 #: build/C/man7/capabilities.7:975 build/C/man7/capabilities.7:1030 build/C/man7/user_namespaces.7:529
2007 #: build/C/man7/capabilities.7:993
2009 "If the filesystem user ID is changed from 0 to nonzero (see B<setfsuid>(2)), "
2010 "then the following capabilities are cleared from the effective set: "
2011 "B<CAP_CHOWN>, B<CAP_DAC_OVERRIDE>, B<CAP_DAC_READ_SEARCH>, B<CAP_FOWNER>, "
2012 "B<CAP_FSETID>, B<CAP_LINUX_IMMUTABLE> (since Linux 2.6.30), "
2013 "B<CAP_MAC_OVERRIDE>, and B<CAP_MKNOD> (since Linux 2.6.30). If the "
2014 "filesystem UID is changed from nonzero to 0, then any of these capabilities "
2015 "that are enabled in the permitted set are enabled in the effective set."
2019 #: build/C/man7/capabilities.7:1001
2021 "If a thread that has a 0 value for one or more of its user IDs wants to "
2022 "prevent its permitted capability set being cleared when it resets all of its "
2023 "user IDs to nonzero values, it can do so using the B<prctl>(2) "
2024 "B<PR_SET_KEEPCAPS> operation."
2028 #: build/C/man7/capabilities.7:1001
2030 msgid "Programmatically adjusting capability sets"
2034 #: build/C/man7/capabilities.7:1016
2036 "A thread can retrieve and change its capability sets using the B<capget>(2) "
2037 "and B<capset>(2) system calls. However, the use of B<cap_get_proc>(3) and "
2038 "B<cap_set_proc>(3), both provided in the I<libcap> package, is preferred for "
2039 "this purpose. The following rules govern changes to the thread capability "
2044 #: build/C/man7/capabilities.7:1022
2046 "If the caller does not have the B<CAP_SETPCAP> capability, the new "
2047 "inheritable set must be a subset of the combination of the existing "
2048 "inheritable and permitted sets."
2052 #: build/C/man7/capabilities.7:1026
2054 "(Since Linux 2.6.25) The new inheritable set must be a subset of the "
2055 "combination of the existing inheritable set and the capability bounding set."
2059 #: build/C/man7/capabilities.7:1030
2061 "The new permitted set must be a subset of the existing permitted set (i.e., "
2062 "it is not possible to acquire permitted capabilities that the thread does "
2063 "not currently have)."
2067 #: build/C/man7/capabilities.7:1032
2068 msgid "The new effective set must be a subset of the new permitted set."
2072 #: build/C/man7/capabilities.7:1032
2074 msgid "The securebits flags: establishing a capabilities-only environment"
2077 #. For some background:
2078 #. see http://lwn.net/Articles/280279/ and
2079 #. http://article.gmane.org/gmane.linux.kernel.lsm/5476/
2081 #: build/C/man7/capabilities.7:1043
2083 "Starting with kernel 2.6.26, and with a kernel in which file capabilities "
2084 "are enabled, Linux implements a set of per-thread I<securebits> flags that "
2085 "can be used to disable special handling of capabilities for UID 0 "
2086 "(I<root>). These flags are as follows:"
2090 #: build/C/man7/capabilities.7:1043
2092 msgid "B<SECBIT_KEEP_CAPS>"
2096 #: build/C/man7/capabilities.7:1055
2098 "Setting this flag allows a thread that has one or more 0 UIDs to retain its "
2099 "capabilities when it switches all of its UIDs to a nonzero value. If this "
2100 "flag is not set, then such a UID switch causes the thread to lose all "
2101 "capabilities. This flag is always cleared on an B<execve>(2). (This flag "
2102 "provides the same functionality as the older B<prctl>(2) B<PR_SET_KEEPCAPS> "
2107 #: build/C/man7/capabilities.7:1055
2109 msgid "B<SECBIT_NO_SETUID_FIXUP>"
2113 #: build/C/man7/capabilities.7:1062
2115 "Setting this flag stops the kernel from adjusting capability sets when the "
2116 "threads's effective and filesystem UIDs are switched between zero and "
2117 "nonzero values. (See the subsection I<Effect of User ID Changes on "
2122 #: build/C/man7/capabilities.7:1062
2124 msgid "B<SECBIT_NOROOT>"
2128 #: build/C/man7/capabilities.7:1070
2130 "If this bit is set, then the kernel does not grant capabilities when a "
2131 "set-user-ID-root program is executed, or when a process with an effective or "
2132 "real UID of 0 calls B<execve>(2). (See the subsection I<Capabilities and "
2133 "execution of programs by root>.)"
2137 #: build/C/man7/capabilities.7:1080
2139 "Each of the above \"base\" flags has a companion \"locked\" flag. Setting "
2140 "any of the \"locked\" flags is irreversible, and has the effect of "
2141 "preventing further changes to the corresponding \"base\" flag. The locked "
2142 "flags are: B<SECBIT_KEEP_CAPS_LOCKED>, B<SECBIT_NO_SETUID_FIXUP_LOCKED>, and "
2143 "B<SECBIT_NOROOT_LOCKED>."
2147 #: build/C/man7/capabilities.7:1092
2149 "The I<securebits> flags can be modified and retrieved using the B<prctl>(2) "
2150 "B<PR_SET_SECUREBITS> and B<PR_GET_SECUREBITS> operations. The "
2151 "B<CAP_SETPCAP> capability is required to modify the flags."
2155 #: build/C/man7/capabilities.7:1101
2157 "The I<securebits> flags are inherited by child processes. During an "
2158 "B<execve>(2), all of the flags are preserved, except B<SECBIT_KEEP_CAPS> "
2159 "which is always cleared."
2163 #: build/C/man7/capabilities.7:1106
2165 "An application can use the following call to lock itself, and all of its "
2166 "descendants, into an environment where the only way of gaining capabilities "
2167 "is by executing a program with associated file capabilities:"
2171 #: build/C/man7/capabilities.7:1115
2174 "prctl(PR_SET_SECUREBITS,\n"
2175 " SECBIT_KEEP_CAPS_LOCKED |\n"
2176 " SECBIT_NO_SETUID_FIXUP |\n"
2177 " SECBIT_NO_SETUID_FIXUP_LOCKED |\n"
2178 " SECBIT_NOROOT |\n"
2179 " SECBIT_NOROOT_LOCKED);\n"
2183 #: build/C/man7/capabilities.7:1117
2185 msgid "Interaction with user namespaces"
2189 #: build/C/man7/capabilities.7:1120
2191 "For a discussion of the interaction of capabilities and user namespaces, see "
2192 "B<user_namespaces>(7)."
2196 #: build/C/man7/capabilities.7:1126
2198 "No standards govern capabilities, but the Linux capability implementation is "
2199 "based on the withdrawn POSIX.1e draft standard; see E<.UR "
2200 "http://wt.tuxomania.net\\:/publications\\:/posix.1e/> E<.UE .>"
2204 #: build/C/man7/capabilities.7:1131
2206 "Since kernel 2.5.27, capabilities are an optional kernel component, and can "
2207 "be enabled/disabled via the B<CONFIG_SECURITY_CAPABILITIES> kernel "
2208 "configuration option."
2211 #. 7b9a7ec565505699f503b4fcf61500dceb36e744
2213 #: build/C/man7/capabilities.7:1145
2215 "The I</proc/PID/task/TID/status> file can be used to view the capability "
2216 "sets of a thread. The I</proc/PID/status> file shows the capability sets of "
2217 "a process's main thread. Before Linux 3.8, nonexistent capabilities were "
2218 "shown as being enabled (1) in these sets. Since Linux 3.8, all nonexistent "
2219 "capabilities (above B<CAP_LAST_CAP>) are shown as disabled (0)."
2223 #: build/C/man7/capabilities.7:1160
2225 "The I<libcap> package provides a suite of routines for setting and getting "
2226 "capabilities that is more comfortable and less likely to change than the "
2227 "interface provided by B<capset>(2) and B<capget>(2). This package also "
2228 "provides the B<setcap>(8) and B<getcap>(8) programs. It can be found at"
2232 #: build/C/man7/capabilities.7:1163
2235 "http://www.kernel.org\\:/pub\\:/linux\\:/libs\\:/security\\:/linux-privs> "
2240 #: build/C/man7/capabilities.7:1172
2242 "Before kernel 2.6.24, and since kernel 2.6.24 if file capabilities are not "
2243 "enabled, a thread with the B<CAP_SETPCAP> capability can manipulate the "
2244 "capabilities of threads other than itself. However, this is only "
2245 "theoretically possible, since no thread ever has B<CAP_SETPCAP> in either of "
2250 #: build/C/man7/capabilities.7:1177
2252 "In the pre-2.6.25 implementation the system-wide capability bounding set, "
2253 "I</proc/sys/kernel/cap-bound>, always masks out this capability, and this "
2254 "can not be changed without modifying the kernel source and rebuilding."
2258 #: build/C/man7/capabilities.7:1183
2260 "If file capabilities are disabled in the current implementation, then "
2261 "B<init> starts out with this capability removed from its per-process "
2262 "bounding set, and that bounding set is inherited by all other processes "
2263 "created on the system."
2267 #: build/C/man7/capabilities.7:1202
2269 "B<capsh>(1), B<capget>(2), B<prctl>(2), B<setfsuid>(2), B<cap_clear>(3), "
2270 "B<cap_copy_ext>(3), B<cap_from_text>(3), B<cap_get_file>(3), "
2271 "B<cap_get_proc>(3), B<cap_init>(3), B<capgetp>(3), B<capsetp>(3), "
2272 "B<libcap>(3), B<credentials>(7), B<user_namespaces>(7), B<pthreads>(7), "
2273 "B<getcap>(8), B<setcap>(8)"
2277 #: build/C/man7/capabilities.7:1204
2278 msgid "I<include/linux/capability.h> in the Linux kernel source tree"
2282 #: build/C/man2/capget.2:15
2288 #: build/C/man2/capget.2:15
2294 #: build/C/man2/capget.2:18
2295 msgid "capget, capset - set/get capabilities of thread(s)"
2299 #: build/C/man2/capget.2:20
2300 msgid "B<#include E<lt>sys/capability.hE<gt>>"
2304 #: build/C/man2/capget.2:22
2305 msgid "B<int capget(cap_user_header_t >I<hdrp>B<, cap_user_data_t >I<datap>B<);>"
2309 #: build/C/man2/capget.2:24
2311 "B<int capset(cap_user_header_t >I<hdrp>B<, const cap_user_data_t "
2316 #: build/C/man2/capget.2:35
2318 "As of Linux 2.2, the power of the superuser (root) has been partitioned into "
2319 "a set of discrete capabilities. Each thread has a set of effective "
2320 "capabilities identifying which capabilities (if any) it may currently "
2321 "exercise. Each thread also has a set of inheritable capabilities that may "
2322 "be passed through an B<execve>(2) call, and a set of permitted capabilities "
2323 "that it can make effective or inheritable."
2327 #: build/C/man2/capget.2:44
2329 "These two system calls are the raw kernel interface for getting and setting "
2330 "thread capabilities. Not only are these system calls specific to Linux, but "
2331 "the kernel API is likely to change and use of these system calls (in "
2332 "particular the format of the I<cap_user_*_t> types) is subject to extension "
2333 "with each kernel revision, but old programs will keep working."
2337 #: build/C/man2/capget.2:55
2339 "The portable interfaces are B<cap_set_proc>(3) and B<cap_get_proc>(3); if "
2340 "possible, you should use those interfaces in applications. If you wish to "
2341 "use the Linux extensions in applications, you should use the easier-to-use "
2342 "interfaces B<capsetp>(3) and B<capgetp>(3)."
2346 #: build/C/man2/capget.2:55
2348 msgid "Current details"
2352 #: build/C/man2/capget.2:58
2354 "Now that you have been warned, some current kernel details. The structures "
2355 "are defined as follows."
2359 #: build/C/man2/capget.2:63
2362 "#define _LINUX_CAPABILITY_VERSION_1 0x19980330\n"
2363 "#define _LINUX_CAPABILITY_U32S_1 1\n"
2367 #: build/C/man2/capget.2:66
2370 "#define _LINUX_CAPABILITY_VERSION_2 0x20071026\n"
2371 "#define _LINUX_CAPABILITY_U32S_2 2\n"
2375 #: build/C/man2/capget.2:71
2378 "typedef struct __user_cap_header_struct {\n"
2381 "} *cap_user_header_t;\n"
2385 #: build/C/man2/capget.2:77
2388 "typedef struct __user_cap_data_struct {\n"
2389 " __u32 effective;\n"
2390 " __u32 permitted;\n"
2391 " __u32 inheritable;\n"
2392 "} *cap_user_data_t;\n"
2396 #: build/C/man2/capget.2:96
2398 "The I<effective>, I<permitted>, and I<inheritable> fields are bit masks of "
2399 "the capabilities defined in B<capabilities>(7). Note the B<CAP_*> values "
2400 "are bit indexes and need to be bit-shifted before ORing into the bit "
2401 "fields. To define the structures for passing to the system call you have to "
2402 "use the I<struct __user_cap_header_struct> and I<struct "
2403 "__user_cap_data_struct> names because the typedefs are only pointers."
2407 #: build/C/man2/capget.2:108
2409 "Kernels prior to 2.6.25 prefer 32-bit capabilities with version "
2410 "B<_LINUX_CAPABILITY_VERSION_1>, and kernels 2.6.25+ prefer 64-bit "
2411 "capabilities with version B<_LINUX_CAPABILITY_VERSION_2>. Note, 64-bit "
2412 "capabilities use I<datap>[0] and I<datap>[1], whereas 32-bit capabilities "
2413 "use only I<datap>[0]."
2417 #: build/C/man2/capget.2:112
2419 "Another change affecting the behavior of these system calls is kernel "
2420 "support for file capabilities (VFS capability support). This support is "
2421 "currently a compile time option (added in kernel 2.6.24)."
2425 #: build/C/man2/capget.2:119
2427 "For B<capget>() calls, one can probe the capabilities of any process by "
2428 "specifying its process ID with the I<hdrp-E<gt>pid> field value."
2432 #: build/C/man2/capget.2:119
2434 msgid "With VFS capability support"
2438 #: build/C/man2/capget.2:131
2440 "VFS Capability support creates a file-attribute method for adding "
2441 "capabilities to privileged executables. This privilege model obsoletes "
2442 "kernel support for one process asynchronously setting the capabilities of "
2443 "another. That is, with VFS support, for B<capset>() calls the only "
2444 "permitted values for I<hdrp-E<gt>pid> are 0 or B<getpid>(2), which are "
2449 #: build/C/man2/capget.2:131
2451 msgid "Without VFS capability support"
2455 #: build/C/man2/capget.2:157
2457 "When the kernel does not support VFS capabilities, B<capset>() calls can "
2458 "operate on the capabilities of the thread specified by the I<pid> field of "
2459 "I<hdrp> when that is nonzero, or on the capabilities of the calling thread "
2460 "if I<pid> is 0. If I<pid> refers to a single-threaded process, then I<pid> "
2461 "can be specified as a traditional process ID; operating on a thread of a "
2462 "multithreaded process requires a thread ID of the type returned by "
2463 "B<gettid>(2). For B<capset>(), I<pid> can also be: -1, meaning perform the "
2464 "change on all threads except the caller and B<init>(1); or a value less than "
2465 "-1, in which case the change is applied to all members of the process group "
2466 "whose ID is -I<pid>."
2470 #: build/C/man2/capget.2:160
2471 msgid "For details on the data, see B<capabilities>(7)."
2475 #: build/C/man2/capget.2:179
2477 "The calls will fail with the error B<EINVAL>, and set the I<version> field "
2478 "of I<hdrp> to the kernel preferred value of B<_LINUX_CAPABILITY_VERSION_?> "
2479 "when an unsupported I<version> value is specified. In this way, one can "
2480 "probe what the current preferred capability revision is."
2484 #: build/C/man2/capget.2:188
2486 "Bad memory address. I<hdrp> must not be NULL. I<datap> may be NULL only "
2487 "when the user is trying to determine the preferred capability version format "
2488 "supported by the kernel."
2492 #: build/C/man2/capget.2:188 build/C/man7/cpuset.7:1180 build/C/man7/cpuset.7:1189 build/C/man7/cpuset.7:1198 build/C/man7/cpuset.7:1208 build/C/man7/cpuset.7:1217 build/C/man7/cpuset.7:1224 build/C/man7/cpuset.7:1231 build/C/man2/getgroups.2:114 build/C/man2/getgroups.2:121 build/C/man2/getpriority.2:118 build/C/man2/getrlimit.2:471 build/C/man2/getrusage.2:198 build/C/man2/iopl.2:72 build/C/man2/ioprio_set.2:170 build/C/man2/seteuid.2:80 build/C/man2/setgid.2:59 build/C/man2/setpgid.2:225 build/C/man2/setresuid.2:99 build/C/man2/setreuid.2:128 build/C/man2/setuid.2:105 build/C/man2/seccomp.2:373 build/C/man2/seccomp.2:380 build/C/man2/seccomp.2:387 build/C/man2/seccomp.2:393 build/C/man2/seccomp.2:402
2498 #: build/C/man2/capget.2:191
2499 msgid "One of the arguments was invalid."
2503 #: build/C/man2/capget.2:196
2505 "An attempt was made to add a capability to the Permitted set, or to set a "
2506 "capability in the Effective or Inheritable sets that is not in the Permitted "
2511 #: build/C/man2/capget.2:215
2513 "The caller attempted to use B<capset>() to modify the capabilities of a "
2514 "thread other than itself, but lacked sufficient privilege. For kernels "
2515 "supporting VFS capabilities, this is never permitted. For kernels lacking "
2516 "VFS support, the B<CAP_SETPCAP> capability is required. (A bug in kernels "
2517 "before 2.6.11 meant that this error could also occur if a thread without "
2518 "this capability tried to change its own capabilities by specifying the "
2519 "I<pid> field as a nonzero value (i.e., the value returned by B<getpid>(2)) "
2524 #: build/C/man2/capget.2:215 build/C/man7/cpuset.7:1330 build/C/man2/getpriority.2:126 build/C/man2/getrlimit.2:502 build/C/man2/getsid.2:70 build/C/man2/ioprio_set.2:187 build/C/man2/setpgid.2:240 build/C/man2/seccomp.2:426
2530 #: build/C/man2/capget.2:218
2531 msgid "No such thread."
2535 #: build/C/man2/capget.2:220 build/C/man2/ioprio_set.2:198
2536 msgid "These system calls are Linux-specific."
2540 #: build/C/man2/capget.2:225
2542 "The portable interface to the capability querying and setting functions is "
2543 "provided by the I<libcap> library and is available here:"
2547 #: build/C/man2/capget.2:228
2550 "http://git.kernel.org/cgit\\:/linux\\:/kernel\\:/git\\:/morgan\\:\\:/libcap.git> "
2555 #: build/C/man2/capget.2:231
2556 msgid "B<clone>(2), B<gettid>(2), B<capabilities>(7)"
2560 #: build/C/man7/cpuset.7:25
2566 #: build/C/man7/cpuset.7:25
2572 #: build/C/man7/cpuset.7:28
2573 msgid "cpuset - confine processes to processor and memory node subsets"
2577 #: build/C/man7/cpuset.7:35
2579 "The cpuset filesystem is a pseudo-filesystem interface to the kernel cpuset "
2580 "mechanism, which is used to control the processor placement and memory "
2581 "placement of processes. It is commonly mounted at I</dev/cpuset>."
2585 #: build/C/man7/cpuset.7:52
2587 "On systems with kernels compiled with built in support for cpusets, all "
2588 "processes are attached to a cpuset, and cpusets are always present. If a "
2589 "system supports cpusets, then it will have the entry B<nodev cpuset> in the "
2590 "file I</proc/filesystems>. By mounting the cpuset filesystem (see the "
2591 "B<EXAMPLE> section below), the administrator can configure the cpusets on a "
2592 "system to control the processor and memory placement of processes on that "
2593 "system. By default, if the cpuset configuration on a system is not modified "
2594 "or if the cpuset filesystem is not even mounted, then the cpuset mechanism, "
2595 "though present, has no affect on the system's behavior."
2599 #: build/C/man7/cpuset.7:54
2600 msgid "A cpuset defines a list of CPUs and memory nodes."
2604 #: build/C/man7/cpuset.7:63
2606 "The CPUs of a system include all the logical processing units on which a "
2607 "process can execute, including, if present, multiple processor cores within "
2608 "a package and Hyper-Threads within a processor core. Memory nodes include "
2609 "all distinct banks of main memory; small and SMP systems typically have just "
2610 "one memory node that contains all the system's main memory, while NUMA "
2611 "(non-uniform memory access) systems have multiple memory nodes."
2615 #: build/C/man7/cpuset.7:73
2617 "Cpusets are represented as directories in a hierarchical pseudo-filesystem, "
2618 "where the top directory in the hierarchy (I</dev/cpuset>) represents the "
2619 "entire system (all online CPUs and memory nodes) and any cpuset that is the "
2620 "child (descendant) of another parent cpuset contains a subset of that "
2621 "parent's CPUs and memory nodes. The directories and files representing "
2622 "cpusets have normal filesystem permissions."
2626 #: build/C/man7/cpuset.7:84
2628 "Every process in the system belongs to exactly one cpuset. A process is "
2629 "confined to run only on the CPUs in the cpuset it belongs to, and to "
2630 "allocate memory only on the memory nodes in that cpuset. When a process "
2631 "B<fork>(2)s, the child process is placed in the same cpuset as its parent. "
2632 "With sufficient privilege, a process may be moved from one cpuset to another "
2633 "and the allowed CPUs and memory nodes of an existing cpuset may be changed."
2637 #: build/C/man7/cpuset.7:92
2639 "When the system begins booting, a single cpuset is defined that includes all "
2640 "CPUs and memory nodes on the system, and all processes are in that cpuset. "
2641 "During the boot process, or later during normal system operation, other "
2642 "cpusets may be created, as subdirectories of this top cpuset, under the "
2643 "control of the system administrator, and processes may be placed in these "
2648 #: build/C/man7/cpuset.7:114
2650 "Cpusets are integrated with the B<sched_setaffinity>(2) scheduling affinity "
2651 "mechanism and the B<mbind>(2) and B<set_mempolicy>(2) memory-placement "
2652 "mechanisms in the kernel. Neither of these mechanisms let a process make "
2653 "use of a CPU or memory node that is not allowed by that process's cpuset. "
2654 "If changes to a process's cpuset placement conflict with these other "
2655 "mechanisms, then cpuset placement is enforced even if it means overriding "
2656 "these other mechanisms. The kernel accomplishes this overriding by silently "
2657 "restricting the CPUs and memory nodes requested by these other mechanisms to "
2658 "those allowed by the invoking process's cpuset. This can result in these "
2659 "other calls returning an error, if for example, such a call ends up "
2660 "requesting an empty set of CPUs or memory nodes, after that request is "
2661 "restricted to the invoking process's cpuset."
2665 #: build/C/man7/cpuset.7:120
2667 "Typically, a cpuset is used to manage the CPU and memory-node confinement "
2668 "for a set of cooperating processes such as a batch scheduler job, and these "
2669 "other mechanisms are used to manage the placement of individual processes or "
2670 "memory regions within that set or job."
2674 #: build/C/man7/cpuset.7:120
2680 #: build/C/man7/cpuset.7:125
2682 "Each directory below I</dev/cpuset> represents a cpuset and contains a fixed "
2683 "set of pseudo-files describing the state of that cpuset."
2687 #: build/C/man7/cpuset.7:135
2689 "New cpusets are created using the B<mkdir>(2) system call or the "
2690 "B<mkdir>(1) command. The properties of a cpuset, such as its flags, "
2691 "allowed CPUs and memory nodes, and attached processes, are queried and "
2692 "modified by reading or writing to the appropriate file in that cpuset's "
2693 "directory, as listed below."
2697 #: build/C/man7/cpuset.7:141
2699 "The pseudo-files in each cpuset directory are automatically created when the "
2700 "cpuset is created, as a result of the B<mkdir>(2) invocation. It is not "
2701 "possible to directly add or remove these pseudo-files."
2705 #: build/C/man7/cpuset.7:149
2707 "A cpuset directory that contains no child cpuset directories, and has no "
2708 "attached processes, can be removed using B<rmdir>(2) or B<rmdir>(1). It is "
2709 "not necessary, or possible, to remove the pseudo-files inside the directory "
2710 "before removing it."
2714 #: build/C/man7/cpuset.7:163
2716 "The pseudo-files in each cpuset directory are small text files that may be "
2717 "read and written using traditional shell utilities such as B<cat>(1), and "
2718 "B<echo>(1), or from a program by using file I/O library functions or system "
2719 "calls, such as B<open>(2), B<read>(2), B<write>(2), and B<close>(2)."
2722 #. ====================== tasks ======================
2724 #: build/C/man7/cpuset.7:168
2726 "The pseudo-files in a cpuset directory represent internal kernel state and "
2727 "do not have any persistent image on disk. Each of these per-cpuset files is "
2728 "listed and described below."
2732 #: build/C/man7/cpuset.7:168
2738 #: build/C/man7/cpuset.7:178
2740 "List of the process IDs (PIDs) of the processes in that cpuset. The list is "
2741 "formatted as a series of ASCII decimal numbers, each followed by a newline. "
2742 "A process may be added to a cpuset (automatically removing it from the "
2743 "cpuset that previously contained it) by writing its PID to that cpuset's "
2744 "I<tasks> file (with or without a trailing newline)."
2747 #. =================== notify_on_release ===================
2749 #: build/C/man7/cpuset.7:186
2751 "B<Warning:> only one PID may be written to the I<tasks> file at a time. If "
2752 "a string is written that contains more than one PID, only the first one will "
2757 #: build/C/man7/cpuset.7:186
2759 msgid "I<notify_on_release>"
2762 #. ====================== cpus ======================
2764 #: build/C/man7/cpuset.7:195
2766 "Flag (0 or 1). If set (1), that cpuset will receive special handling after "
2767 "it is released, that is, after all processes cease using it (i.e., terminate "
2768 "or are moved to a different cpuset) and all child cpuset directories have "
2769 "been removed. See the B<Notify On Release> section, below."
2773 #: build/C/man7/cpuset.7:195
2775 msgid "I<cpuset.cpus>"
2779 #: build/C/man7/cpuset.7:202
2781 "List of the physical numbers of the CPUs on which processes in that cpuset "
2782 "are allowed to execute. See B<List Format> below for a description of the "
2783 "format of I<cpus>."
2786 #. ==================== cpu_exclusive ====================
2788 #: build/C/man7/cpuset.7:208
2790 "The CPUs allowed to a cpuset may be changed by writing a new list to its "
2795 #: build/C/man7/cpuset.7:208
2797 msgid "I<cpuset.cpu_exclusive>"
2801 #: build/C/man7/cpuset.7:215
2803 "Flag (0 or 1). If set (1), the cpuset has exclusive use of its CPUs (no "
2804 "sibling or cousin cpuset may overlap CPUs). By default this is off (0). "
2805 "Newly created cpusets also initially default this to off (0)."
2808 #. ====================== mems ======================
2810 #: build/C/man7/cpuset.7:237
2812 "Two cpusets are I<sibling> cpusets if they share the same parent cpuset in "
2813 "the I</dev/cpuset> hierarchy. Two cpusets are I<cousin> cpusets if neither "
2814 "is the ancestor of the other. Regardless of the I<cpu_exclusive> setting, "
2815 "if one cpuset is the ancestor of another, and if both of these cpusets have "
2816 "nonempty I<cpus>, then their I<cpus> must overlap, because the I<cpus> of "
2817 "any cpuset are always a subset of the I<cpus> of its parent cpuset."
2821 #: build/C/man7/cpuset.7:237
2823 msgid "I<cpuset.mems>"
2826 #. ==================== mem_exclusive ====================
2828 #: build/C/man7/cpuset.7:245
2830 "List of memory nodes on which processes in this cpuset are allowed to "
2831 "allocate memory. See B<List Format> below for a description of the format "
2836 #: build/C/man7/cpuset.7:245
2838 msgid "I<cpuset.mem_exclusive>"
2842 #: build/C/man7/cpuset.7:253
2844 "Flag (0 or 1). If set (1), the cpuset has exclusive use of its memory nodes "
2845 "(no sibling or cousin may overlap). Also if set (1), the cpuset is a "
2846 "B<Hardwall> cpuset (see below). By default this is off (0). Newly created "
2847 "cpusets also initially default this to off (0)."
2850 #. ==================== mem_hardwall ====================
2852 #: build/C/man7/cpuset.7:261
2854 "Regardless of the I<mem_exclusive> setting, if one cpuset is the ancestor of "
2855 "another, then their memory nodes must overlap, because the memory nodes of "
2856 "any cpuset are always a subset of the memory nodes of that cpuset's parent "
2861 #: build/C/man7/cpuset.7:261
2863 msgid "I<cpuset.mem_hardwall> (since Linux 2.6.26)"
2866 #. ==================== memory_migrate ====================
2868 #: build/C/man7/cpuset.7:272
2870 "Flag (0 or 1). If set (1), the cpuset is a B<Hardwall> cpuset (see below). "
2871 "Unlike B<mem_exclusive>, there is no constraint on whether cpusets marked "
2872 "B<mem_hardwall> may have overlapping memory nodes with sibling or cousin "
2873 "cpusets. By default this is off (0). Newly created cpusets also initially "
2874 "default this to off (0)."
2878 #: build/C/man7/cpuset.7:272
2880 msgid "I<cpuset.memory_migrate> (since Linux 2.6.16)"
2883 #. ==================== memory_pressure ====================
2885 #: build/C/man7/cpuset.7:279
2887 "Flag (0 or 1). If set (1), then memory migration is enabled. By default "
2888 "this is off (0). See the B<Memory Migration> section, below."
2892 #: build/C/man7/cpuset.7:279
2894 msgid "I<cpuset.memory_pressure> (since Linux 2.6.16)"
2897 #. ================= memory_pressure_enabled =================
2899 #: build/C/man7/cpuset.7:292
2901 "A measure of how much memory pressure the processes in this cpuset are "
2902 "causing. See the B<Memory Pressure> section, below. Unless "
2903 "I<memory_pressure_enabled> is enabled, always has value zero (0). This file "
2904 "is read-only. See the B<WARNINGS> section, below."
2908 #: build/C/man7/cpuset.7:292
2910 msgid "I<cpuset.memory_pressure_enabled> (since Linux 2.6.16)"
2913 #. ================== memory_spread_page ==================
2915 #: build/C/man7/cpuset.7:304
2917 "Flag (0 or 1). This file is present only in the root cpuset, normally "
2918 "I</dev/cpuset>. If set (1), the I<memory_pressure> calculations are enabled "
2919 "for all cpusets in the system. By default this is off (0). See the "
2920 "B<Memory Pressure> section, below."
2924 #: build/C/man7/cpuset.7:304
2926 msgid "I<cpuset.memory_spread_page> (since Linux 2.6.17)"
2929 #. ================== memory_spread_slab ==================
2931 #: build/C/man7/cpuset.7:314
2933 "Flag (0 or 1). If set (1), pages in the kernel page cache (filesystem "
2934 "buffers) are uniformly spread across the cpuset. By default this is off (0) "
2935 "in the top cpuset, and inherited from the parent cpuset in newly created "
2936 "cpusets. See the B<Memory Spread> section, below."
2940 #: build/C/man7/cpuset.7:314
2942 msgid "I<cpuset.memory_spread_slab> (since Linux 2.6.17)"
2945 #. ================== sched_load_balance ==================
2947 #: build/C/man7/cpuset.7:325
2949 "Flag (0 or 1). If set (1), the kernel slab caches for file I/O (directory "
2950 "and inode structures) are uniformly spread across the cpuset. By default "
2951 "this is off (0) in the top cpuset, and inherited from the parent cpuset in "
2952 "newly created cpusets. See the B<Memory Spread> section, below."
2956 #: build/C/man7/cpuset.7:325
2958 msgid "I<cpuset.sched_load_balance> (since Linux 2.6.24)"
2961 #. ================== sched_relax_domain_level ==================
2963 #: build/C/man7/cpuset.7:339
2965 "Flag (0 or 1). If set (1, the default) the kernel will automatically load "
2966 "balance processes in that cpuset over the allowed CPUs in that cpuset. If "
2967 "cleared (0) the kernel will avoid load balancing processes in this cpuset, "
2968 "I<unless> some other cpuset with overlapping CPUs has its "
2969 "I<sched_load_balance> flag set. See B<Scheduler Load Balancing>, below, for "
2974 #: build/C/man7/cpuset.7:339
2976 msgid "I<cpuset.sched_relax_domain_level> (since Linux 2.6.26)"
2979 #. ================== proc cpuset ==================
2981 #: build/C/man7/cpuset.7:359
2983 "Integer, between -1 and a small positive value. The "
2984 "I<sched_relax_domain_level> controls the width of the range of CPUs over "
2985 "which the kernel scheduler performs immediate rebalancing of runnable tasks "
2986 "across CPUs. If I<sched_load_balance> is disabled, then the setting of "
2987 "I<sched_relax_domain_level> does not matter, as no such load balancing is "
2988 "done. If I<sched_load_balance> is enabled, then the higher the value of the "
2989 "I<sched_relax_domain_level>, the wider the range of CPUs over which "
2990 "immediate load balancing is attempted. See B<Scheduler Relax Domain Level>, "
2991 "below, for further details."
2994 #. ================== proc status ==================
2996 #: build/C/man7/cpuset.7:367
2998 "In addition to the above pseudo-files in each directory below "
2999 "I</dev/cpuset>, each process has a pseudo-file, "
3000 "I</proc/E<lt>pidE<gt>/cpuset>, that displays the path of the process's "
3001 "cpuset directory relative to the root of the cpuset filesystem."
3005 #: build/C/man7/cpuset.7:378
3007 "Also the I</proc/E<lt>pidE<gt>/status> file for each process has four added "
3008 "lines, displaying the process's I<Cpus_allowed> (on which CPUs it may be "
3009 "scheduled) and I<Mems_allowed> (on which memory nodes it may obtain memory), "
3010 "in the two formats B<Mask Format> and B<List Format> (see below) as shown "
3011 "in the following example:"
3015 #: build/C/man7/cpuset.7:385
3018 "Cpus_allowed: ffffffff,ffffffff,ffffffff,ffffffff\n"
3019 "Cpus_allowed_list: 0-127\n"
3020 "Mems_allowed: ffffffff,ffffffff\n"
3021 "Mems_allowed_list: 0-63\n"
3024 #. ================== EXTENDED CAPABILITIES ==================
3026 #: build/C/man7/cpuset.7:391
3028 "The \"allowed\" fields were added in Linux 2.6.24; the \"allowed_list\" "
3029 "fields were added in Linux 2.6.26."
3033 #: build/C/man7/cpuset.7:391
3035 msgid "EXTENDED CAPABILITIES"
3038 #. ================== Exclusive Cpusets ==================
3040 #: build/C/man7/cpuset.7:399
3042 "In addition to controlling which I<cpus> and I<mems> a process is allowed to "
3043 "use, cpusets provide the following extended capabilities."
3047 #: build/C/man7/cpuset.7:399
3049 msgid "Exclusive cpusets"
3053 #: build/C/man7/cpuset.7:406
3055 "If a cpuset is marked I<cpu_exclusive> or I<mem_exclusive>, no other cpuset, "
3056 "other than a direct ancestor or descendant, may share any of the same CPUs "
3060 #. ================== Hardwall ==================
3062 #: build/C/man7/cpuset.7:432
3064 "A cpuset that is I<mem_exclusive> restricts kernel allocations for buffer "
3065 "cache pages and other internal kernel data pages commonly shared by the "
3066 "kernel across multiple users. All cpusets, whether I<mem_exclusive> or not, "
3067 "restrict allocations of memory for user space. This enables configuring a "
3068 "system so that several independent jobs can share common kernel data, while "
3069 "isolating each job's user allocation in its own cpuset. To do this, "
3070 "construct a large I<mem_exclusive> cpuset to hold all the jobs, and "
3071 "construct child, non-I<mem_exclusive> cpusets for each individual job. Only "
3072 "a small amount of kernel memory, such as requests from interrupt handlers, "
3073 "is allowed to be placed on memory nodes outside even a I<mem_exclusive> "
3078 #: build/C/man7/cpuset.7:432
3084 #: build/C/man7/cpuset.7:447
3086 "A cpuset that has I<mem_exclusive> or I<mem_hardwall> set is a I<hardwall> "
3087 "cpuset. A I<hardwall> cpuset restricts kernel allocations for page, buffer, "
3088 "and other data commonly shared by the kernel across multiple users. All "
3089 "cpusets, whether I<hardwall> or not, restrict allocations of memory for user "
3094 #: build/C/man7/cpuset.7:458
3096 "This enables configuring a system so that several independent jobs can share "
3097 "common kernel data, such as filesystem pages, while isolating each job's "
3098 "user allocation in its own cpuset. To do this, construct a large "
3099 "I<hardwall> cpuset to hold all the jobs, and construct child cpusets for "
3100 "each individual job which are not I<hardwall> cpusets."
3103 #. ================== Notify On Release ==================
3105 #: build/C/man7/cpuset.7:464
3107 "Only a small amount of kernel memory, such as requests from interrupt "
3108 "handlers, is allowed to be taken outside even a I<hardwall> cpuset."
3112 #: build/C/man7/cpuset.7:464
3114 msgid "Notify on release"
3118 #: build/C/man7/cpuset.7:476
3120 "If the I<notify_on_release> flag is enabled (1) in a cpuset, then whenever "
3121 "the last process in the cpuset leaves (exits or attaches to some other "
3122 "cpuset) and the last child cpuset of that cpuset is removed, the kernel "
3123 "will run the command I</sbin/cpuset_release_agent>, supplying the pathname "
3124 "(relative to the mount point of the cpuset filesystem) of the abandoned "
3125 "cpuset. This enables automatic removal of abandoned cpusets."
3129 #: build/C/man7/cpuset.7:484
3131 "The default value of I<notify_on_release> in the root cpuset at system boot "
3132 "is disabled (0). The default value of other cpusets at creation is the "
3133 "current value of their parent's I<notify_on_release> setting."
3137 #: build/C/man7/cpuset.7:492
3139 "The command I</sbin/cpuset_release_agent> is invoked, with the name "
3140 "(I</dev/cpuset> relative path) of the to-be-released cpuset in I<argv[1]>."
3144 #: build/C/man7/cpuset.7:496
3146 "The usual contents of the command I</sbin/cpuset_release_agent> is simply "
3151 #: build/C/man7/cpuset.7:501
3155 "rmdir /dev/cpuset/$1\n"
3158 #. ================== Memory Pressure ==================
3160 #: build/C/man7/cpuset.7:509
3162 "As with other flag values below, this flag can be changed by writing an "
3163 "ASCII number 0 or 1 (with optional trailing newline) into the file, to "
3164 "clear or set the flag, respectively."
3168 #: build/C/man7/cpuset.7:509
3170 msgid "Memory pressure"
3174 #: build/C/man7/cpuset.7:515
3176 "The I<memory_pressure> of a cpuset provides a simple per-cpuset running "
3177 "average of the rate that the processes in a cpuset are attempting to free up "
3178 "in-use memory on the nodes of the cpuset to satisfy additional memory "
3183 #: build/C/man7/cpuset.7:519
3185 "This enables batch managers that are monitoring jobs running in dedicated "
3186 "cpusets to efficiently detect what level of memory pressure that job is "
3191 #: build/C/man7/cpuset.7:526
3193 "This is useful both on tightly managed systems running a wide mix of "
3194 "submitted jobs, which may choose to terminate or reprioritize jobs that are "
3195 "trying to use more memory than allowed on the nodes assigned them, and with "
3196 "tightly coupled, long-running, massively parallel scientific computing jobs "
3197 "that will dramatically fail to meet required performance goals if they start "
3198 "to use more memory than allowed to them."
3202 #: build/C/man7/cpuset.7:531
3204 "This mechanism provides a very economical way for the batch manager to "
3205 "monitor a cpuset for signs of memory pressure. It's up to the batch manager "
3206 "or other user code to decide what action to take if it detects signs of "
3211 #: build/C/man7/cpuset.7:538
3213 "Unless memory pressure calculation is enabled by setting the pseudo-file "
3214 "I</dev/cpuset/cpuset.memory_pressure_enabled>, it is not computed for any "
3215 "cpuset, and reads from any I<memory_pressure> always return zero, as "
3216 "represented by the ASCII string \"0\\en\". See the B<WARNINGS> section, "
3221 #: build/C/man7/cpuset.7:540
3222 msgid "A per-cpuset, running average is employed for the following reasons:"
3226 #: build/C/man7/cpuset.7:545
3228 "Because this meter is per-cpuset rather than per-process or per virtual "
3229 "memory region, the system load imposed by a batch scheduler monitoring this "
3230 "metric is sharply reduced on large systems, because a scan of the tasklist "
3231 "can be avoided on each set of queries."
3235 #: build/C/man7/cpuset.7:550
3237 "Because this meter is a running average rather than an accumulating counter, "
3238 "a batch scheduler can detect memory pressure with a single read, instead of "
3239 "having to read and accumulate results for a period of time."
3243 #: build/C/man7/cpuset.7:556
3245 "Because this meter is per-cpuset rather than per-process, the batch "
3246 "scheduler can obtain the key information\\(emmemory pressure in a "
3247 "cpuset\\(emwith a single read, rather than having to query and accumulate "
3248 "results over all the (dynamically changing) set of processes in the cpuset."
3252 #: build/C/man7/cpuset.7:564
3254 "The I<memory_pressure> of a cpuset is calculated using a per-cpuset simple "
3255 "digital filter that is kept within the kernel. For each cpuset, this filter "
3256 "tracks the recent rate at which processes attached to that cpuset enter the "
3257 "kernel direct reclaim code."
3261 #: build/C/man7/cpuset.7:573
3263 "The kernel direct reclaim code is entered whenever a process has to satisfy "
3264 "a memory page request by first finding some other page to repurpose, due to "
3265 "lack of any readily available already free pages. Dirty filesystem pages "
3266 "are repurposed by first writing them to disk. Unmodified filesystem buffer "
3267 "pages are repurposed by simply dropping them, though if that page is needed "
3268 "again, it will have to be reread from disk."
3271 #. ================== Memory Spread ==================
3273 #: build/C/man7/cpuset.7:581
3275 "The I<cpuset.memory_pressure> file provides an integer number representing "
3276 "the recent (half-life of 10 seconds) rate of entries to the direct reclaim "
3277 "code caused by any process in the cpuset, in units of reclaims attempted per "
3278 "second, times 1000."
3282 #: build/C/man7/cpuset.7:581
3284 msgid "Memory spread"
3288 #: build/C/man7/cpuset.7:589
3290 "There are two Boolean flag files per cpuset that control where the kernel "
3291 "allocates pages for the filesystem buffers and related in-kernel data "
3292 "structures. They are called I<cpuset.memory_spread_page> and "
3293 "I<cpuset.memory_spread_slab>."
3297 #: build/C/man7/cpuset.7:596
3299 "If the per-cpuset Boolean flag file I<cpuset.memory_spread_page> is set, "
3300 "then the kernel will spread the filesystem buffers (page cache) evenly over "
3301 "all the nodes that the faulting process is allowed to use, instead of "
3302 "preferring to put those pages on the node where the process is running."
3306 #: build/C/man7/cpuset.7:604
3308 "If the per-cpuset Boolean flag file I<cpuset.memory_spread_slab> is set, "
3309 "then the kernel will spread some filesystem-related slab caches, such as "
3310 "those for inodes and directory entries, evenly over all the nodes that the "
3311 "faulting process is allowed to use, instead of preferring to put those pages "
3312 "on the node where the process is running."
3316 #: build/C/man7/cpuset.7:609
3318 "The setting of these flags does not affect the data segment (see B<brk>(2)) "
3319 "or stack segment pages of a process."
3323 #: build/C/man7/cpuset.7:617
3325 "By default, both kinds of memory spreading are off and the kernel prefers to "
3326 "allocate memory pages on the node local to where the requesting process is "
3327 "running. If that node is not allowed by the process's NUMA memory policy or "
3328 "cpuset configuration or if there are insufficient free memory pages on that "
3329 "node, then the kernel looks for the nearest node that is allowed and has "
3330 "sufficient free memory."
3334 #: build/C/man7/cpuset.7:620
3336 "When new cpusets are created, they inherit the memory spread settings of "
3341 #: build/C/man7/cpuset.7:635
3343 "Setting memory spreading causes allocations for the affected page or slab "
3344 "caches to ignore the process's NUMA memory policy and be spread instead. "
3345 "However, the effect of these changes in memory placement caused by "
3346 "cpuset-specified memory spreading is hidden from the B<mbind>(2) or "
3347 "B<set_mempolicy>(2) calls. These two NUMA memory policy calls always "
3348 "appear to behave as if no cpuset-specified memory spreading is in effect, "
3349 "even if it is. If cpuset memory spreading is subsequently turned off, the "
3350 "NUMA memory policy most recently specified by these calls is automatically "
3355 #: build/C/man7/cpuset.7:644
3357 "Both I<cpuset.memory_spread_page> and I<cpuset.memory_spread_slab> are "
3358 "Boolean flag files. By default they contain \"0\", meaning that the feature "
3359 "is off for that cpuset. If a \"1\" is written to that file, that turns the "
3364 #: build/C/man7/cpuset.7:647
3366 "Cpuset-specified memory spreading behaves similarly to what is known (in "
3367 "other contexts) as round-robin or interleave memory placement."
3371 #: build/C/man7/cpuset.7:650
3373 "Cpuset-specified memory spreading can provide substantial performance "
3374 "improvements for jobs that:"
3378 #: build/C/man7/cpuset.7:650 build/C/man7/user_namespaces.7:384
3384 #: build/C/man7/cpuset.7:654
3386 "need to place thread-local data on memory nodes close to the CPUs which are "
3387 "running the threads that most frequently access that data; but also"
3391 #: build/C/man7/cpuset.7:654 build/C/man7/user_namespaces.7:389
3397 #: build/C/man7/cpuset.7:657
3399 "need to access large filesystem data sets that must to be spread across the "
3400 "several nodes in the job's cpuset in order to fit."
3403 #. ================== Memory Migration ==================
3405 #: build/C/man7/cpuset.7:664
3407 "Without this policy, the memory allocation across the nodes in the job's "
3408 "cpuset can become very uneven, especially for jobs that might have just a "
3409 "single thread initializing or reading in the data set."
3413 #: build/C/man7/cpuset.7:664
3415 msgid "Memory migration"
3419 #: build/C/man7/cpuset.7:673
3421 "Normally, under the default setting (disabled) of I<cpuset.memory_migrate>, "
3422 "once a page is allocated (given a physical page of main memory), then that "
3423 "page stays on whatever node it was allocated, so long as it remains "
3424 "allocated, even if the cpuset's memory-placement policy I<mems> subsequently "
3429 #: build/C/man7/cpuset.7:679
3431 "When memory migration is enabled in a cpuset, if the I<mems> setting of the "
3432 "cpuset is changed, then any memory page in use by any process in the cpuset "
3433 "that is on a memory node that is no longer allowed will be migrated to a "
3434 "memory node that is allowed."
3438 #: build/C/man7/cpuset.7:685
3440 "Furthermore, if a process is moved into a cpuset with I<memory_migrate> "
3441 "enabled, any memory pages it uses that were on memory nodes allowed in its "
3442 "previous cpuset, but which are not allowed in its new cpuset, will be "
3443 "migrated to a memory node allowed in the new cpuset."
3446 #. ================== Scheduler Load Balancing ==================
3448 #: build/C/man7/cpuset.7:693
3450 "The relative placement of a migrated page within the cpuset is preserved "
3451 "during these migration operations if possible. For example, if the page was "
3452 "on the second valid node of the prior cpuset, then the page will be placed "
3453 "on the second valid node of the new cpuset, if possible."
3457 #: build/C/man7/cpuset.7:693
3459 msgid "Scheduler load balancing"
3463 #: build/C/man7/cpuset.7:700
3465 "The kernel scheduler automatically load balances processes. If one CPU is "
3466 "underutilized, the kernel will look for processes on other more overloaded "
3467 "CPUs and move those processes to the underutilized CPU, within the "
3468 "constraints of such placement mechanisms as cpusets and "
3469 "B<sched_setaffinity>(2)."
3473 #: build/C/man7/cpuset.7:713
3475 "The algorithmic cost of load balancing and its impact on key shared kernel "
3476 "data structures such as the process list increases more than linearly with "
3477 "the number of CPUs being balanced. For example, it costs more to load "
3478 "balance across one large set of CPUs than it does to balance across two "
3479 "smaller sets of CPUs, each of half the size of the larger set. (The precise "
3480 "relationship between the number of CPUs being balanced and the cost of load "
3481 "balancing depends on implementation details of the kernel process scheduler, "
3482 "which is subject to change over time, as improved kernel scheduler "
3483 "algorithms are implemented.)"
3487 #: build/C/man7/cpuset.7:719
3489 "The per-cpuset flag I<sched_load_balance> provides a mechanism to suppress "
3490 "this automatic scheduler load balancing in cases where it is not needed and "
3491 "suppressing it would have worthwhile performance benefits."
3495 #: build/C/man7/cpuset.7:723
3497 "By default, load balancing is done across all CPUs, except those marked "
3498 "isolated using the kernel boot time \"isolcpus=\" argument. (See "
3499 "B<Scheduler Relax Domain Level>, below, to change this default.)"
3503 #: build/C/man7/cpuset.7:726
3505 "This default load balancing across all CPUs is not well suited to the "
3506 "following two situations:"
3510 #: build/C/man7/cpuset.7:730
3512 "On large systems, load balancing across many CPUs is expensive. If the "
3513 "system is managed using cpusets to place independent jobs on separate sets "
3514 "of CPUs, full load balancing is unnecessary."
3518 #: build/C/man7/cpuset.7:734
3520 "Systems supporting real-time on some CPUs need to minimize system overhead "
3521 "on those CPUs, including avoiding process load balancing if that is not "
3526 #: build/C/man7/cpuset.7:744
3528 "When the per-cpuset flag I<sched_load_balance> is enabled (the default "
3529 "setting), it requests load balancing across all the CPUs in that cpuset's "
3530 "allowed CPUs, ensuring that load balancing can move a process (not otherwise "
3531 "pinned, as by B<sched_setaffinity>(2)) from any CPU in that cpuset to any "
3536 #: build/C/man7/cpuset.7:753
3538 "When the per-cpuset flag I<sched_load_balance> is disabled, then the "
3539 "scheduler will avoid load balancing across the CPUs in that cpuset, "
3540 "I<except> in so far as is necessary because some overlapping cpuset has "
3541 "I<sched_load_balance> enabled."
3545 #: build/C/man7/cpuset.7:761
3547 "So, for example, if the top cpuset has the flag I<sched_load_balance> "
3548 "enabled, then the scheduler will load balance across all CPUs, and the "
3549 "setting of the I<sched_load_balance> flag in other cpusets has no effect, as "
3550 "we're already fully load balancing."
3554 #: build/C/man7/cpuset.7:766
3556 "Therefore in the above two situations, the flag I<sched_load_balance> should "
3557 "be disabled in the top cpuset, and only some of the smaller, child cpusets "
3558 "would have this flag enabled."
3562 #: build/C/man7/cpuset.7:774
3564 "When doing this, you don't usually want to leave any unpinned processes in "
3565 "the top cpuset that might use nontrivial amounts of CPU, as such processes "
3566 "may be artificially constrained to some subset of CPUs, depending on the "
3567 "particulars of this flag setting in descendant cpusets. Even if such a "
3568 "process could use spare CPU cycles in some other CPUs, the kernel scheduler "
3569 "might not consider the possibility of load balancing that process to the "
3573 #. ================== Scheduler Relax Domain Level ==================
3575 #: build/C/man7/cpuset.7:780
3577 "Of course, processes pinned to a particular CPU can be left in a cpuset that "
3578 "disables I<sched_load_balance> as those processes aren't going anywhere else "
3583 #: build/C/man7/cpuset.7:780
3585 msgid "Scheduler relax domain level"
3589 #: build/C/man7/cpuset.7:801
3591 "The kernel scheduler performs immediate load balancing whenever a CPU "
3592 "becomes free or another task becomes runnable. This load balancing works to "
3593 "ensure that as many CPUs as possible are usefully employed running tasks. "
3594 "The kernel also performs periodic load balancing off the software clock "
3595 "described in B<time>(7). The setting of I<sched_relax_domain_level> applies "
3596 "only to immediate load balancing. Regardless of the "
3597 "I<sched_relax_domain_level> setting, periodic load balancing is attempted "
3598 "over all CPUs (unless disabled by turning off I<sched_load_balance>.) In "
3599 "any case, of course, tasks will be scheduled to run only on CPUs allowed by "
3600 "their cpuset, as modified by B<sched_setaffinity>(2) system calls."
3604 #: build/C/man7/cpuset.7:809
3606 "On small systems, such as those with just a few CPUs, immediate load "
3607 "balancing is useful to improve system interactivity and to minimize wasteful "
3608 "idle CPU cycles. But on large systems, attempting immediate load balancing "
3609 "across a large number of CPUs can be more costly than it is worth, depending "
3610 "on the particular performance characteristics of the job mix and the "
3615 #: build/C/man7/cpuset.7:817
3617 "The exact meaning of the small integer values of I<sched_relax_domain_level> "
3618 "will depend on internal implementation details of the kernel scheduler code "
3619 "and on the non-uniform architecture of the hardware. Both of these will "
3620 "evolve over time and vary by system architecture and kernel version."
3624 #: build/C/man7/cpuset.7:822
3626 "As of this writing, when this capability was introduced in Linux 2.6.26, on "
3627 "certain popular architectures, the positive values of "
3628 "I<sched_relax_domain_level> have the following meanings."
3632 #: build/C/man7/cpuset.7:824
3638 #: build/C/man7/cpuset.7:827
3640 "Perform immediate load balancing across Hyper-Thread siblings on the same "
3645 #: build/C/man7/cpuset.7:827
3651 #: build/C/man7/cpuset.7:829
3652 msgid "Perform immediate load balancing across other cores in the same package."
3656 #: build/C/man7/cpuset.7:829
3662 #: build/C/man7/cpuset.7:832
3664 "Perform immediate load balancing across other CPUs on the same node or "
3669 #: build/C/man7/cpuset.7:832
3675 #: build/C/man7/cpuset.7:835
3677 "Perform immediate load balancing across over several (implementation detail) "
3678 "nodes [On NUMA systems]."
3682 #: build/C/man7/cpuset.7:835
3688 #: build/C/man7/cpuset.7:838
3690 "Perform immediate load balancing across over all CPUs in system [On NUMA "
3695 #: build/C/man7/cpuset.7:847
3697 "The I<sched_relax_domain_level> value of zero (0) always means don't perform "
3698 "immediate load balancing, hence that load balancing is done only "
3699 "periodically, not immediately when a CPU becomes available or another task "
3704 #: build/C/man7/cpuset.7:855
3706 "The I<sched_relax_domain_level> value of minus one (-1) always means use "
3707 "the system default value. The system default value can vary by architecture "
3708 "and kernel version. This system default value can be changed by kernel "
3709 "boot-time \"relax_domain_level=\" argument."
3713 #: build/C/man7/cpuset.7:863
3715 "In the case of multiple overlapping cpusets which have conflicting "
3716 "I<sched_relax_domain_level> values, then the highest such value applies to "
3717 "all CPUs in any of the overlapping cpusets. In such cases, the value "
3718 "B<minus one (-1)> is the lowest value, overridden by any other value, and "
3719 "the value B<zero (0)> is the next lowest value."
3723 #: build/C/man7/cpuset.7:863
3728 #. ================== Mask Format ==================
3730 #: build/C/man7/cpuset.7:867
3731 msgid "The following formats are used to represent sets of CPUs and memory nodes."
3735 #: build/C/man7/cpuset.7:867
3741 #: build/C/man7/cpuset.7:872
3743 "The B<Mask Format> is used to represent CPU and memory-node bit masks in the "
3744 "I</proc/E<lt>pidE<gt>/status> file."
3748 #: build/C/man7/cpuset.7:880
3750 "This format displays each 32-bit word in hexadecimal (using ASCII characters "
3751 "\"0\" - \"9\" and \"a\" - \"f\"); words are filled with leading zeros, if "
3752 "required. For masks longer than one word, a comma separator is used between "
3753 "words. Words are displayed in big-endian order, which has the most "
3754 "significant bit first. The hex digits within a word are also in big-endian "
3759 #: build/C/man7/cpuset.7:883
3761 "The number of 32-bit words displayed is the minimum number needed to display "
3762 "all bits of the bit mask, based on the size of the bit mask."
3766 #: build/C/man7/cpuset.7:885
3767 msgid "Examples of the B<Mask Format>:"
3771 #: build/C/man7/cpuset.7:893
3774 "00000001 # just bit 0 set\n"
3775 "40000000,00000000,00000000 # just bit 94 set\n"
3776 "00000001,00000000,00000000 # just bit 64 set\n"
3777 "000000ff,00000000 # bits 32-39 set\n"
3778 "00000000,000e3862 # 1,5,6,11-13,17-19 set\n"
3782 #: build/C/man7/cpuset.7:897
3783 msgid "A mask with bits 0, 1, 2, 4, 8, 16, 32, and 64 set displays as:"
3787 #: build/C/man7/cpuset.7:901
3789 msgid "00000001,00000001,00010117\n"
3792 #. ================== List Format ==================
3794 #: build/C/man7/cpuset.7:908
3796 "The first \"1\" is for bit 64, the second for bit 32, the third for bit 16, "
3797 "the fourth for bit 8, the fifth for bit 4, and the \"7\" is for bits 2, 1, "
3802 #: build/C/man7/cpuset.7:908
3808 #: build/C/man7/cpuset.7:915
3810 "The B<List Format> for I<cpus> and I<mems> is a comma-separated list of CPU "
3811 "or memory-node numbers and ranges of numbers, in ASCII decimal."
3815 #: build/C/man7/cpuset.7:917
3816 msgid "Examples of the B<List Format>:"
3820 #: build/C/man7/cpuset.7:922
3823 "0-4,9 # bits 0, 1, 2, 3, 4, and 9 set\n"
3824 "0-2,7,12-14 # bits 0, 1, 2, 7, 12, 13, and 14 set\n"
3827 #. ================== RULES ==================
3829 #: build/C/man7/cpuset.7:925
3835 #: build/C/man7/cpuset.7:927
3836 msgid "The following rules apply to each cpuset:"
3840 #: build/C/man7/cpuset.7:930
3842 "Its CPUs and memory nodes must be a (possibly equal) subset of its "
3847 #: build/C/man7/cpuset.7:934
3848 msgid "It can be marked I<cpu_exclusive> only if its parent is."
3852 #: build/C/man7/cpuset.7:938
3853 msgid "It can be marked I<mem_exclusive> only if its parent is."
3857 #: build/C/man7/cpuset.7:942
3858 msgid "If it is I<cpu_exclusive>, its CPUs may not overlap any sibling."
3861 #. ================== PERMISSIONS ==================
3863 #: build/C/man7/cpuset.7:947
3864 msgid "If it is I<memory_exclusive>, its memory nodes may not overlap any sibling."
3868 #: build/C/man7/cpuset.7:947
3874 #: build/C/man7/cpuset.7:952
3876 "The permissions of a cpuset are determined by the permissions of the "
3877 "directories and pseudo-files in the cpuset filesystem, normally mounted at "
3882 #: build/C/man7/cpuset.7:961
3884 "For instance, a process can put itself in some other cpuset (than its "
3885 "current one) if it can write the I<tasks> file for that cpuset. This "
3886 "requires execute permission on the encompassing directories and write "
3887 "permission on the I<tasks> file."
3891 #: build/C/man7/cpuset.7:968
3893 "An additional constraint is applied to requests to place some other process "
3894 "in a cpuset. One process may not attach another to a cpuset unless it would "
3895 "have permission to send that process a signal (see B<kill>(2))."
3899 #: build/C/man7/cpuset.7:979
3901 "A process may create a child cpuset if it can access and write the parent "
3902 "cpuset directory. It can modify the CPUs or memory nodes in a cpuset if it "
3903 "can access that cpuset's directory (execute permissions on the each of the "
3904 "parent directories) and write the corresponding I<cpus> or I<mems> file."
3908 #: build/C/man7/cpuset.7:1000
3910 "There is one minor difference between the manner in which these permissions "
3911 "are evaluated and the manner in which normal filesystem operation "
3912 "permissions are evaluated. The kernel interprets relative pathnames "
3913 "starting at a process's current working directory. Even if one is operating "
3914 "on a cpuset file, relative pathnames are interpreted relative to the "
3915 "process's current working directory, not relative to the process's current "
3916 "cpuset. The only ways that cpuset paths relative to a process's current "
3917 "cpuset can be used are if either the process's current working directory is "
3918 "its cpuset (it first did a B<cd> or B<chdir>(2) to its cpuset directory "
3919 "beneath I</dev/cpuset>, which is a bit unusual) or if some user code "
3920 "converts the relative cpuset path to a full filesystem path."
3923 #. ================== WARNINGS ==================
3925 #: build/C/man7/cpuset.7:1015
3927 "In theory, this means that user code should specify cpusets using absolute "
3928 "pathnames, which requires knowing the mount point of the cpuset filesystem "
3929 "(usually, but not necessarily, I</dev/cpuset>). In practice, all user level "
3930 "code that this author is aware of simply assumes that if the cpuset "
3931 "filesystem is mounted, then it is mounted at I</dev/cpuset>. Furthermore, "
3932 "it is common practice for carefully written user code to verify the presence "
3933 "of the pseudo-file I</dev/cpuset/tasks> in order to verify that the cpuset "
3934 "pseudo-filesystem is currently mounted."
3938 #: build/C/man7/cpuset.7:1015
3944 #: build/C/man7/cpuset.7:1016
3946 msgid "Enabling memory_pressure"
3950 #: build/C/man7/cpuset.7:1025
3952 "By default, the per-cpuset file I<cpuset.memory_pressure> always contains "
3953 "zero (0). Unless this feature is enabled by writing \"1\" to the "
3954 "pseudo-file I</dev/cpuset/cpuset.memory_pressure_enabled>, the kernel does "
3955 "not compute per-cpuset I<memory_pressure>."
3959 #: build/C/man7/cpuset.7:1025
3961 msgid "Using the echo command"
3964 #. Gack! csh(1)'s echo does this
3966 #: build/C/man7/cpuset.7:1036
3968 "When using the B<echo> command at the shell prompt to change the values of "
3969 "cpuset files, beware that the built-in B<echo> command in some shells does "
3970 "not display an error message if the B<write>(2) system call fails. For "
3971 "example, if the command:"
3975 #: build/C/man7/cpuset.7:1040
3977 msgid "echo 19 E<gt> cpuset.mems\n"
3981 #: build/C/man7/cpuset.7:1053
3983 "failed because memory node 19 was not allowed (perhaps the current system "
3984 "does not have a memory node 19), then the B<echo> command might not display "
3985 "any error. It is better to use the B</bin/echo> external command to change "
3986 "cpuset file settings, as this command will display B<write>(2) errors, as "
3991 #: build/C/man7/cpuset.7:1058
3994 "/bin/echo 19 E<gt> cpuset.mems\n"
3995 "/bin/echo: write error: Invalid argument\n"
3998 #. ================== EXCEPTIONS ==================
4000 #: build/C/man7/cpuset.7:1061
4006 #: build/C/man7/cpuset.7:1062
4008 msgid "Memory placement"
4012 #: build/C/man7/cpuset.7:1065
4014 "Not all allocations of system memory are constrained by cpusets, for the "
4015 "following reasons."
4019 #: build/C/man7/cpuset.7:1080
4021 "If hot-plug functionality is used to remove all the CPUs that are currently "
4022 "assigned to a cpuset, then the kernel will automatically update the "
4023 "I<cpus_allowed> of all processes attached to CPUs in that cpuset to allow "
4024 "all CPUs. When memory hot-plug functionality for removing memory nodes is "
4025 "available, a similar exception is expected to apply there as well. In "
4026 "general, the kernel prefers to violate cpuset placement, rather than "
4027 "starving a process that has had all its allowed CPUs or memory nodes taken "
4028 "offline. User code should reconfigure cpusets to refer only to online CPUs "
4029 "and memory nodes when using hot-plug to add or remove such resources."
4033 #: build/C/man7/cpuset.7:1088
4035 "A few kernel-critical, internal memory-allocation requests, marked "
4036 "GFP_ATOMIC, must be satisfied immediately. The kernel may drop some request "
4037 "or malfunction if one of these allocations fail. If such a request cannot "
4038 "be satisfied within the current process's cpuset, then we relax the cpuset, "
4039 "and look for memory anywhere we can find it. It's better to violate the "
4040 "cpuset than stress the kernel."
4044 #: build/C/man7/cpuset.7:1092
4046 "Allocations of memory requested by kernel drivers while processing an "
4047 "interrupt lack any relevant process context, and are not confined by "
4052 #: build/C/man7/cpuset.7:1092
4054 msgid "Renaming cpusets"
4057 #. ================== ERRORS ==================
4059 #: build/C/man7/cpuset.7:1100
4061 "You can use the B<rename>(2) system call to rename cpusets. Only simple "
4062 "renaming is supported; that is, changing the name of a cpuset directory is "
4063 "permitted, but moving a directory into a different directory is not "
4068 #: build/C/man7/cpuset.7:1104
4070 "The Linux kernel implementation of cpusets sets I<errno> to specify the "
4071 "reason for a failed system call affecting cpusets."
4075 #: build/C/man7/cpuset.7:1109
4077 "The possible I<errno> settings and their meaning when set on a failed cpuset "
4078 "call are as listed below."
4082 #: build/C/man7/cpuset.7:1109
4088 #: build/C/man7/cpuset.7:1116
4090 "Attempted a B<write>(2) on a special cpuset file with a length larger than "
4091 "some kernel-determined upper limit on the length of such writes."
4095 #: build/C/man7/cpuset.7:1123
4097 "Attempted to B<write>(2) the process ID (PID) of a process to a cpuset "
4098 "I<tasks> file when one lacks permission to move that process."
4102 #: build/C/man7/cpuset.7:1129
4104 "Attempted to add, using B<write>(2), a CPU or memory node to a cpuset, when "
4105 "that CPU or memory node was not already in its parent."
4109 #: build/C/man7/cpuset.7:1137
4111 "Attempted to set, using B<write>(2), I<cpuset.cpu_exclusive> or "
4112 "I<cpuset.mem_exclusive> on a cpuset whose parent lacks the same setting."
4116 #: build/C/man7/cpuset.7:1144
4117 msgid "Attempted to B<write>(2) a I<cpuset.memory_pressure> file."
4121 #: build/C/man7/cpuset.7:1147
4122 msgid "Attempted to create a file in a cpuset directory."
4126 #: build/C/man7/cpuset.7:1147 build/C/man7/cpuset.7:1152 build/C/man7/cpuset.7:1157
4132 #: build/C/man7/cpuset.7:1152
4133 msgid "Attempted to remove, using B<rmdir>(2), a cpuset with attached processes."
4137 #: build/C/man7/cpuset.7:1157
4138 msgid "Attempted to remove, using B<rmdir>(2), a cpuset with child cpusets."
4142 #: build/C/man7/cpuset.7:1162
4144 "Attempted to remove a CPU or memory node from a cpuset that is also in a "
4145 "child of that cpuset."
4149 #: build/C/man7/cpuset.7:1162 build/C/man7/cpuset.7:1167
4155 #: build/C/man7/cpuset.7:1167
4156 msgid "Attempted to create, using B<mkdir>(2), a cpuset that already exists."
4160 #: build/C/man7/cpuset.7:1172
4161 msgid "Attempted to B<rename>(2) a cpuset to a name that already exists."
4165 #: build/C/man7/cpuset.7:1180
4167 "Attempted to B<read>(2) or B<write>(2) a cpuset file using a buffer that "
4168 "is outside the writing processes accessible address space."
4172 #: build/C/man7/cpuset.7:1189
4174 "Attempted to change a cpuset, using B<write>(2), in a way that would violate "
4175 "a I<cpu_exclusive> or I<mem_exclusive> attribute of that cpuset or any of "
4180 #: build/C/man7/cpuset.7:1198
4182 "Attempted to B<write>(2) an empty I<cpuset.cpus> or I<cpuset.mems> list to "
4183 "a cpuset which has attached processes or child cpusets."
4187 #: build/C/man7/cpuset.7:1208
4189 "Attempted to B<write>(2) a I<cpuset.cpus> or I<cpuset.mems> list which "
4190 "included a range with the second number smaller than the first number."
4194 #: build/C/man7/cpuset.7:1217
4196 "Attempted to B<write>(2) a I<cpuset.cpus> or I<cpuset.mems> list which "
4197 "included an invalid character in the string."
4201 #: build/C/man7/cpuset.7:1224
4203 "Attempted to B<write>(2) a list to a I<cpuset.cpus> file that did not "
4204 "include any online CPUs."
4208 #: build/C/man7/cpuset.7:1231
4210 "Attempted to B<write>(2) a list to a I<cpuset.mems> file that did not "
4211 "include any online memory nodes."
4215 #: build/C/man7/cpuset.7:1238
4217 "Attempted to B<write>(2) a list to a I<cpuset.mems> file that included a "
4218 "node that held no memory."
4222 #: build/C/man7/cpuset.7:1246
4224 "Attempted to B<write>(2) a string to a cpuset I<tasks> file that does not "
4225 "begin with an ASCII decimal integer."
4229 #: build/C/man7/cpuset.7:1251
4230 msgid "Attempted to B<rename>(2) a cpuset into a different directory."
4234 #: build/C/man7/cpuset.7:1258
4236 "Attempted to B<read>(2) a I</proc/E<lt>pidE<gt>/cpuset> file for a cpuset "
4237 "path that is longer than the kernel page size."
4241 #: build/C/man7/cpuset.7:1263
4243 "Attempted to create, using B<mkdir>(2), a cpuset whose base directory name "
4244 "is longer than 255 characters."
4248 #: build/C/man7/cpuset.7:1270
4250 "Attempted to create, using B<mkdir>(2), a cpuset whose full pathname, "
4251 "including the mount point (typically \"/dev/cpuset/\") prefix, is longer "
4252 "than 4095 characters."
4256 #: build/C/man7/cpuset.7:1270
4262 #: build/C/man7/cpuset.7:1275
4264 "The cpuset was removed by another process at the same time as a B<write>(2) "
4265 "was attempted on one of the pseudo-files in the cpuset directory."
4269 #: build/C/man7/cpuset.7:1280
4271 "Attempted to create, using B<mkdir>(2), a cpuset in a parent cpuset that "
4276 #: build/C/man7/cpuset.7:1287
4278 "Attempted to B<access>(2) or B<open>(2) a nonexistent file in a cpuset "
4283 #: build/C/man7/cpuset.7:1292
4285 "Insufficient memory is available within the kernel; can occur on a variety "
4286 "of system calls affecting cpusets, but only if the system is extremely short "
4291 #: build/C/man7/cpuset.7:1292 build/C/man7/cpuset.7:1304
4297 #: build/C/man7/cpuset.7:1304
4299 "Attempted to B<write>(2) the process ID (PID) of a process to a cpuset "
4300 "I<tasks> file when the cpuset had an empty I<cpuset.cpus> or empty "
4301 "I<cpuset.mems> setting."
4305 #: build/C/man7/cpuset.7:1314
4307 "Attempted to B<write>(2) an empty I<cpuset.cpus> or I<cpuset.mems> setting "
4308 "to a cpuset that has tasks attached."
4312 #: build/C/man7/cpuset.7:1319
4313 msgid "Attempted to B<rename>(2) a nonexistent cpuset."
4317 #: build/C/man7/cpuset.7:1322
4318 msgid "Attempted to remove a file from a cpuset directory."
4322 #: build/C/man7/cpuset.7:1322
4328 #: build/C/man7/cpuset.7:1330
4330 "Specified a I<cpuset.cpus> or I<cpuset.mems> list to the kernel which "
4331 "included a number too large for the kernel to set in its bit masks."
4334 #. ================== VERSIONS ==================
4336 #: build/C/man7/cpuset.7:1338
4338 "Attempted to B<write>(2) the process ID (PID) of a nonexistent process to a "
4339 "cpuset I<tasks> file."
4342 #. ================== NOTES ==================
4344 #: build/C/man7/cpuset.7:1341
4345 msgid "Cpusets appeared in version 2.6.12 of the Linux kernel."
4348 #. ================== BUGS ==================
4350 #: build/C/man7/cpuset.7:1352
4352 "Despite its name, the I<pid> parameter is actually a thread ID, and each "
4353 "thread in a threaded group can be attached to a different cpuset. The value "
4354 "returned from a call to B<gettid>(2) can be passed in the argument I<pid>."
4358 #: build/C/man7/cpuset.7:1352 build/C/man2/getpriority.2:225 build/C/man2/getrlimit.2:577 build/C/man2/ioprio_set.2:337 build/C/man2/setfsgid.2:106 build/C/man2/setfsuid.2:114
4363 #. ================== EXAMPLE ==================
4365 #: build/C/man7/cpuset.7:1365
4367 "I<cpuset.memory_pressure> cpuset files can be opened for writing, creation, "
4368 "or truncation, but then the B<write>(2) fails with I<errno> set to "
4369 "B<EACCES>, and the creation and truncation options on B<open>(2) have no "
4374 #: build/C/man7/cpuset.7:1365 build/C/man2/getrlimit.2:710 build/C/man7/namespaces.7:361 build/C/man7/pid_namespaces.7:353 build/C/man7/user_namespaces.7:677 build/C/man2/seccomp.2:476
4380 #: build/C/man7/cpuset.7:1368
4382 "The following examples demonstrate querying and setting cpuset options using "
4387 #: build/C/man7/cpuset.7:1368
4389 msgid "Creating and attaching to a cpuset."
4393 #: build/C/man7/cpuset.7:1371
4395 "To create a new cpuset and attach the current command shell to it, the steps "
4400 #: build/C/man7/cpuset.7:1373 build/C/man7/cpuset.7:1412
4406 #: build/C/man7/cpuset.7:1375
4407 msgid "mkdir /dev/cpuset (if not already done)"
4411 #: build/C/man7/cpuset.7:1375 build/C/man7/cpuset.7:1418
4417 #: build/C/man7/cpuset.7:1377
4418 msgid "mount -t cpuset none /dev/cpuset (if not already done)"
4422 #: build/C/man7/cpuset.7:1377 build/C/man7/cpuset.7:1421
4428 #: build/C/man7/cpuset.7:1380
4429 msgid "Create the new cpuset using B<mkdir>(1)."
4433 #: build/C/man7/cpuset.7:1380 build/C/man7/cpuset.7:1424
4439 #: build/C/man7/cpuset.7:1382
4440 msgid "Assign CPUs and memory nodes to the new cpuset."
4444 #: build/C/man7/cpuset.7:1382 build/C/man7/cpuset.7:1429
4450 #: build/C/man7/cpuset.7:1384
4451 msgid "Attach the shell to the new cpuset."
4455 #: build/C/man7/cpuset.7:1389
4457 "For example, the following sequence of commands will set up a cpuset named "
4458 "\"Charlie\", containing just CPUs 2 and 3, and memory node 1, and then "
4459 "attach the current shell to that cpuset."
4463 #: build/C/man7/cpuset.7:1403
4466 "$B< mkdir /dev/cpuset>\n"
4467 "$B< mount -t cpuset cpuset /dev/cpuset>\n"
4468 "$B< cd /dev/cpuset>\n"
4469 "$B< mkdir Charlie>\n"
4471 "$B< /bin/echo 2-3 E<gt> cpuset.cpus>\n"
4472 "$B< /bin/echo 1 E<gt> cpuset.mems>\n"
4473 "$B< /bin/echo $$ E<gt> tasks>\n"
4474 "# The current shell is now running in cpuset Charlie\n"
4475 "# The next line should display '/Charlie'\n"
4476 "$B< cat /proc/self/cpuset>\n"
4480 #: build/C/man7/cpuset.7:1405
4482 msgid "Migrating a job to different memory nodes."
4486 #: build/C/man7/cpuset.7:1410
4488 "To migrate a job (the set of processes attached to a cpuset) to different "
4489 "CPUs and memory nodes in the system, including moving the memory pages "
4490 "currently allocated to that job, perform the following steps."
4494 #: build/C/man7/cpuset.7:1418
4496 "Let's say we want to move the job in cpuset I<alpha> (CPUs 4-7 and memory "
4497 "nodes 2-3) to a new cpuset I<beta> (CPUs 16-19 and memory nodes 8-9)."
4501 #: build/C/man7/cpuset.7:1421
4502 msgid "First create the new cpuset I<beta>."
4506 #: build/C/man7/cpuset.7:1424
4507 msgid "Then allow CPUs 16-19 and memory nodes 8-9 in I<beta>."
4511 #: build/C/man7/cpuset.7:1429
4512 msgid "Then enable I<memory_migration> in I<beta>."
4516 #: build/C/man7/cpuset.7:1434
4517 msgid "Then move each process from I<alpha> to I<beta>."
4521 #: build/C/man7/cpuset.7:1437
4522 msgid "The following sequence of commands accomplishes this."
4526 #: build/C/man7/cpuset.7:1447
4529 "$B< cd /dev/cpuset>\n"
4532 "$B< /bin/echo 16-19 E<gt> cpuset.cpus>\n"
4533 "$B< /bin/echo 8-9 E<gt> cpuset.mems>\n"
4534 "$B< /bin/echo 1 E<gt> cpuset.memory_migrate>\n"
4535 "$B< while read i; do /bin/echo $i; done E<lt> ../alpha/tasks E<gt> tasks>\n"
4539 #: build/C/man7/cpuset.7:1456
4541 "The above should move any processes in I<alpha> to I<beta>, and any memory "
4542 "held by these processes on memory nodes 2-3 to memory nodes 8-9, "
4547 #: build/C/man7/cpuset.7:1458
4548 msgid "Notice that the last step of the above sequence did not do:"
4552 #: build/C/man7/cpuset.7:1462
4554 msgid "$B< cp ../alpha/tasks tasks>\n"
4558 #: build/C/man7/cpuset.7:1473
4560 "The I<while> loop, rather than the seemingly easier use of the B<cp>(1) "
4561 "command, was necessary because only one process PID at a time may be written "
4562 "to the I<tasks> file."
4566 #: build/C/man7/cpuset.7:1481
4568 "The same effect (writing one PID at a time) as the I<while> loop can be "
4569 "accomplished more efficiently, in fewer keystrokes and in syntax that works "
4570 "on any shell, but alas more obscurely, by using the B<-u> (unbuffered) "
4571 "option of B<sed>(1):"
4575 #: build/C/man7/cpuset.7:1485
4577 msgid "$B< sed -un p E<lt> ../alpha/tasks E<gt> tasks>\n"
4581 #: build/C/man7/cpuset.7:1503
4583 "B<taskset>(1), B<get_mempolicy>(2), B<getcpu>(2), B<mbind>(2), "
4584 "B<sched_getaffinity>(2), B<sched_setaffinity>(2), B<sched_setscheduler>(2), "
4585 "B<set_mempolicy>(2), B<CPU_SET>(3), B<proc>(5), B<numa>(7), B<sched>(7), "
4586 "B<migratepages>(8), B<numactl>(8)"
4590 #: build/C/man7/cpuset.7:1505
4591 msgid "I<Documentation/cpusets.txt> in the Linux kernel source tree"
4595 #: build/C/man7/credentials.7:27
4601 #: build/C/man7/credentials.7:27 build/C/man2/setsid.2:31
4607 #: build/C/man7/credentials.7:30
4608 msgid "credentials - process identifiers"
4612 #: build/C/man7/credentials.7:31
4614 msgid "Process ID (PID)"
4618 #: build/C/man7/credentials.7:41
4620 "Each process has a unique nonnegative integer identifier that is assigned "
4621 "when the process is created using B<fork>(2). A process can obtain its PID "
4622 "using B<getpid>(2). A PID is represented using the type I<pid_t> (defined "
4623 "in I<E<lt>sys/types.hE<gt>>)."
4626 #. .BR sched_rr_get_interval (2),
4627 #. .BR sched_getaffinity (2),
4628 #. .BR sched_setaffinity (2),
4629 #. .BR sched_getparam (2),
4630 #. .BR sched_setparam (2),
4631 #. .BR sched_setscheduler (2),
4632 #. .BR sched_getscheduler (2),
4637 #: build/C/man7/credentials.7:62
4639 "PIDs are used in a range of system calls to identify the process affected by "
4640 "the call, for example: B<kill>(2), B<ptrace>(2), B<setpriority>(2) "
4641 "B<setpgid>(2), B<setsid>(2), B<sigqueue>(3), and B<waitpid>(2)."
4645 #: build/C/man7/credentials.7:65
4646 msgid "A process's PID is preserved across an B<execve>(2)."
4650 #: build/C/man7/credentials.7:65
4652 msgid "Parent process ID (PPID)"
4656 #: build/C/man7/credentials.7:73
4658 "A process's parent process ID identifies the process that created this "
4659 "process using B<fork>(2). A process can obtain its PPID using "
4660 "B<getppid>(2). A PPID is represented using the type I<pid_t>."
4664 #: build/C/man7/credentials.7:76
4665 msgid "A process's PPID is preserved across an B<execve>(2)."
4669 #: build/C/man7/credentials.7:76
4671 msgid "Process group ID and session ID"
4675 #: build/C/man7/credentials.7:84
4677 "Each process has a session ID and a process group ID, both represented using "
4678 "the type I<pid_t>. A process can obtain its session ID using B<getsid>(2), "
4679 "and its process group ID using B<getpgrp>(2)."
4683 #: build/C/man7/credentials.7:90
4685 "A child created by B<fork>(2) inherits its parent's session ID and process "
4686 "group ID. A process's session ID and process group ID are preserved across "
4691 #: build/C/man7/credentials.7:103
4693 "Sessions and process groups are abstractions devised to support shell job "
4694 "control. A process group (sometimes called a \"job\") is a collection of "
4695 "processes that share the same process group ID; the shell creates a new "
4696 "process group for the process(es) used to execute single command or pipeline "
4697 "(e.g., the two processes created to execute the command \"ls\\ |\\ wc\" are "
4698 "placed in the same process group). A process's group membership can be set "
4699 "using B<setpgid>(2). The process whose process ID is the same as its "
4700 "process group ID is the I<process group leader> for that group."
4704 #: build/C/man7/credentials.7:115
4706 "A session is a collection of processes that share the same session ID. All "
4707 "of the members of a process group also have the same session ID (i.e., all "
4708 "of the members of a process group always belong to the same session, so that "
4709 "sessions and process groups form a strict two-level hierarchy of processes.) "
4710 "A new session is created when a process calls B<setsid>(2), which creates a "
4711 "new session whose session ID is the same as the PID of the process that "
4712 "called B<setsid>(2). The creator of the session is called the I<session "
4717 #: build/C/man7/credentials.7:124
4719 "All of the processes in a session share a I<controlling terminal>. The "
4720 "controlling terminal is established when the session leader first opens a "
4721 "terminal (unless the B<O_NOCTTY> flag is specified when calling "
4722 "B<open>(2)). A terminal may be the controlling terminal of at most one "
4727 #: build/C/man7/credentials.7:146
4729 "At most one of the jobs in a session may be the I<foreground job>; other "
4730 "jobs in the session are I<background jobs>. Only the foreground job may "
4731 "read from the terminal; when a process in the background attempts to read "
4732 "from the terminal, its process group is sent a B<SIGTTIN> signal, which "
4733 "suspends the job. If the B<TOSTOP> flag has been set for the terminal (see "
4734 "B<termios>(3)), then only the foreground job may write to the terminal; "
4735 "writes from background job cause a B<SIGTTOU> signal to be generated, which "
4736 "suspends the job. When terminal keys that generate a signal (such as the "
4737 "I<interrupt> key, normally control-C) are pressed, the signal is sent to "
4738 "the processes in the foreground job."
4742 #: build/C/man7/credentials.7:167
4744 "Various system calls and library functions may operate on all members of a "
4745 "process group, including B<kill>(2), B<killpg>(2), B<getpriority>(2), "
4746 "B<setpriority>(2), B<ioprio_get>(2), B<ioprio_set>(2), B<waitid>(2), and "
4747 "B<waitpid>(2). See also the discussion of the B<F_GETOWN>, B<F_GETOWN_EX>, "
4748 "B<F_SETOWN>, and B<F_SETOWN_EX> operations in B<fcntl>(2)."
4752 #: build/C/man7/credentials.7:167
4754 msgid "User and group identifiers"
4758 #: build/C/man7/credentials.7:175
4760 "Each process has various associated user and groups IDs. These IDs are "
4761 "integers, respectively represented using the types I<uid_t> and I<gid_t> "
4762 "(defined in I<E<lt>sys/types.hE<gt>>)."
4766 #: build/C/man7/credentials.7:177
4767 msgid "On Linux, each process has the following user and group identifiers:"
4771 #: build/C/man7/credentials.7:183
4773 "Real user ID and real group ID. These IDs determine who owns the process. "
4774 "A process can obtain its real user (group) ID using B<getuid>(2) "
4779 #: build/C/man7/credentials.7:195
4781 "Effective user ID and effective group ID. These IDs are used by the kernel "
4782 "to determine the permissions that the process will have when accessing "
4783 "shared resources such as message queues, shared memory, and semaphores. On "
4784 "most UNIX systems, these IDs also determine the permissions when accessing "
4785 "files. However, Linux uses the filesystem IDs described below for this "
4786 "task. A process can obtain its effective user (group) ID using "
4787 "B<geteuid>(2) (B<getegid>(2))."
4791 #: build/C/man7/credentials.7:217
4793 "Saved set-user-ID and saved set-group-ID. These IDs are used in set-user-ID "
4794 "and set-group-ID programs to save a copy of the corresponding effective IDs "
4795 "that were set when the program was executed (see B<execve>(2)). A "
4796 "set-user-ID program can assume and drop privileges by switching its "
4797 "effective user ID back and forth between the values in its real user ID and "
4798 "saved set-user-ID. This switching is done via calls to B<seteuid>(2), "
4799 "B<setreuid>(2), or B<setresuid>(2). A set-group-ID program performs the "
4800 "analogous tasks using B<setegid>(2), B<setregid>(2), or B<setresgid>(2). A "
4801 "process can obtain its saved set-user-ID (set-group-ID) using "
4802 "B<getresuid>(2) (B<getresgid>(2))."
4806 #: build/C/man7/credentials.7:234
4808 "Filesystem user ID and filesystem group ID (Linux-specific). These IDs, in "
4809 "conjunction with the supplementary group IDs described below, are used to "
4810 "determine permissions for accessing files; see B<path_resolution>(7) for "
4811 "details. Whenever a process's effective user (group) ID is changed, the "
4812 "kernel also automatically changes the filesystem user (group) ID to the same "
4813 "value. Consequently, the filesystem IDs normally have the same values as "
4814 "the corresponding effective ID, and the semantics for file-permission checks "
4815 "are thus the same on Linux as on other UNIX systems. The filesystem IDs can "
4816 "be made to differ from the effective IDs by calling B<setfsuid>(2) and "
4820 #. Since kernel 2.6.4, the limit is visible via the read-only file
4821 #. /proc/sys/kernel/ngroups_max.
4822 #. As at 2.6.22-rc2, this file is still read-only.
4824 #: build/C/man7/credentials.7:253
4826 "Supplementary group IDs. This is a set of additional group IDs that are "
4827 "used for permission checks when accessing files and other shared resources. "
4828 "On Linux kernels before 2.6.4, a process can be a member of up to 32 "
4829 "supplementary groups; since kernel 2.6.4, a process can be a member of up to "
4830 "65536 supplementary groups. The call I<sysconf(_SC_NGROUPS_MAX)> can be "
4831 "used to determine the number of supplementary groups of which a process may "
4832 "be a member. A process can obtain its set of supplementary group IDs using "
4833 "B<getgroups>(2), and can modify the set using B<setgroups>(2)."
4837 #: build/C/man7/credentials.7:263
4839 "A child process created by B<fork>(2) inherits copies of its parent's user "
4840 "and groups IDs. During an B<execve>(2), a process's real user and group ID "
4841 "and supplementary group IDs are preserved; the effective and saved set IDs "
4842 "may be changed, as described in B<execve>(2)."
4846 #: build/C/man7/credentials.7:266
4848 "Aside from the purposes noted above, a process's user IDs are also employed "
4849 "in a number of other contexts:"
4853 #: build/C/man7/credentials.7:269
4854 msgid "when determining the permissions for sending signals (see B<kill>(2));"
4858 #: build/C/man7/credentials.7:280
4860 "when determining the permissions for setting process-scheduling parameters "
4861 "(nice value, real time scheduling policy and priority, CPU affinity, I/O "
4862 "priority) using B<setpriority>(2), B<sched_setaffinity>(2), "
4863 "B<sched_setscheduler>(2), B<sched_setparam>(2), B<sched_setattr>(2), and "
4868 #: build/C/man7/credentials.7:283
4869 msgid "when checking resource limits (see B<getrlimit>(2));"
4873 #: build/C/man7/credentials.7:287
4875 "when checking the limit on the number of inotify instances that the process "
4876 "may create (see B<inotify>(7))."
4880 #: build/C/man7/credentials.7:293
4882 "Process IDs, parent process IDs, process group IDs, and session IDs are "
4883 "specified in POSIX.1-2001. The real, effective, and saved set user and "
4884 "groups IDs, and the supplementary group IDs, are specified in POSIX.1-2001. "
4885 "The filesystem user and group IDs are a Linux extension."
4889 #: build/C/man7/credentials.7:304
4891 "The POSIX threads specification requires that credentials are shared by all "
4892 "of the threads in a process. However, at the kernel level, Linux maintains "
4893 "separate user and group credentials for each thread. The NPTL threading "
4894 "implementation does some work to ensure that any change to user or group "
4895 "credentials (e.g., calls to B<setuid>(2), B<setresuid>(2)) is carried "
4896 "through to all of the POSIX threads in a process."
4900 #: build/C/man7/credentials.7:339
4902 "B<bash>(1), B<csh>(1), B<ps>(1), B<access>(2), B<execve>(2), "
4903 "B<faccessat>(2), B<fork>(2), B<getgroups>(2), B<getpgrp>(2), B<getpid>(2), "
4904 "B<getppid>(2), B<getsid>(2), B<kill>(2), B<killpg>(2), B<setegid>(2), "
4905 "B<seteuid>(2), B<setfsgid>(2), B<setfsuid>(2), B<setgid>(2), "
4906 "B<setgroups>(2), B<setresgid>(2), B<setresuid>(2), B<setuid>(2), "
4907 "B<waitpid>(2), B<euidaccess>(3), B<initgroups>(3), B<tcgetpgrp>(3), "
4908 "B<tcsetpgrp>(3), B<capabilities>(7), B<namespaces>(7), "
4909 "B<path_resolution>(7), B<pid_namespaces>(7), B<signal>(7), "
4910 "B<user_namespaces>(7), B<unix>(7)"
4914 #: build/C/man2/getgid.2:25
4920 #: build/C/man2/getgid.2:25 build/C/man2/getresuid.2:28 build/C/man2/getuid.2:26
4926 #: build/C/man2/getgid.2:28
4927 msgid "getgid, getegid - get group identity"
4931 #: build/C/man2/getgid.2:30 build/C/man2/getgroups.2:38 build/C/man2/getpid.2:32 build/C/man2/getresuid.2:35 build/C/man2/getsid.2:31 build/C/man2/getuid.2:31 build/C/man3/group_member.3:30 build/C/man2/seteuid.2:36 build/C/man2/setgid.2:36 build/C/man2/setpgid.2:53 build/C/man2/setresuid.2:33 build/C/man2/setreuid.2:52 build/C/man2/setsid.2:37 build/C/man2/setuid.2:37
4932 msgid "B<#include E<lt>unistd.hE<gt>>"
4936 #: build/C/man2/getgid.2:32 build/C/man2/getgroups.2:36 build/C/man2/getpid.2:30 build/C/man2/getuid.2:33 build/C/man2/seteuid.2:34 build/C/man2/setgid.2:34 build/C/man2/setreuid.2:50 build/C/man2/setuid.2:35
4937 msgid "B<#include E<lt>sys/types.hE<gt>>"
4941 #: build/C/man2/getgid.2:34
4942 msgid "B<gid_t getgid(void);>"
4946 #: build/C/man2/getgid.2:36
4947 msgid "B<gid_t getegid(void);>"
4951 #: build/C/man2/getgid.2:39
4952 msgid "B<getgid>() returns the real group ID of the calling process."
4956 #: build/C/man2/getgid.2:42
4957 msgid "B<getegid>() returns the effective group ID of the calling process."
4961 #: build/C/man2/getgid.2:44 build/C/man2/getpid.2:46 build/C/man2/getuid.2:45
4962 msgid "These functions are always successful."
4966 #: build/C/man2/getgid.2:46 build/C/man2/getuid.2:47
4967 msgid "POSIX.1-2001, 4.3BSD."
4971 #: build/C/man2/getgid.2:62
4973 "The original Linux B<getgid>() and B<getegid>() system calls supported "
4974 "only 16-bit group IDs. Subsequently, Linux 2.4 added B<getgid32>() and "
4975 "B<getegid32>(), supporting 32-bit IDs. The glibc B<getgid>() and "
4976 "B<getegid>() wrapper functions transparently deal with the variations "
4977 "across kernel versions."
4981 #: build/C/man2/getgid.2:66
4982 msgid "B<getresgid>(2), B<setgid>(2), B<setregid>(2), B<credentials>(7)"
4986 #: build/C/man2/getgroups.2:31
4992 #: build/C/man2/getgroups.2:31 build/C/man2/getpriority.2:45
4998 #: build/C/man2/getgroups.2:34
4999 msgid "getgroups, setgroups - get/set list of supplementary group IDs"
5003 #: build/C/man2/getgroups.2:40
5004 msgid "B<int getgroups(int >I<size>B<, gid_t >I<list>B<[]);>"
5008 #: build/C/man2/getgroups.2:42
5009 msgid "B<#include E<lt>grp.hE<gt>>"
5013 #: build/C/man2/getgroups.2:44
5014 msgid "B<int setgroups(size_t >I<size>B<, const gid_t *>I<list>B<);>"
5018 #: build/C/man2/getgroups.2:52
5019 msgid "B<setgroups>(): _BSD_SOURCE"
5023 #: build/C/man2/getgroups.2:70
5025 "B<getgroups>() returns the supplementary group IDs of the calling process "
5026 "in I<list>. The argument I<size> should be set to the maximum number of "
5027 "items that can be stored in the buffer pointed to by I<list>. If the "
5028 "calling process is a member of more than I<size> supplementary groups, then "
5029 "an error results. It is unspecified whether the effective group ID of the "
5030 "calling process is included in the returned list. (Thus, an application "
5031 "should also call B<getegid>(2) and add or remove the resulting value.)"
5035 #: build/C/man2/getgroups.2:81
5037 "If I<size> is zero, I<list> is not modified, but the total number of "
5038 "supplementary group IDs for the process is returned. This allows the caller "
5039 "to determine the size of a dynamically allocated I<list> to be used in a "
5040 "further call to B<getgroups>()."
5044 #: build/C/man2/getgroups.2:92
5046 "B<setgroups>() sets the supplementary group IDs for the calling process. "
5047 "Appropriate privileges (Linux: the B<CAP_SETGID> capability) are required. "
5048 "The I<size> argument specifies the number of supplementary group IDs in the "
5049 "buffer pointed to by I<list>."
5053 #: build/C/man2/getgroups.2:99
5055 "On success, B<getgroups>() returns the number of supplementary group IDs. "
5056 "On error, -1 is returned, and I<errno> is set appropriately."
5060 #: build/C/man2/getgroups.2:106
5062 "On success, B<setgroups>() returns 0. On error, -1 is returned, and "
5063 "I<errno> is set appropriately."
5067 #: build/C/man2/getgroups.2:111
5068 msgid "I<list> has an invalid address."
5072 #: build/C/man2/getgroups.2:114
5073 msgid "B<getgroups>() can additionally fail with the following error:"
5077 #: build/C/man2/getgroups.2:118
5078 msgid "I<size> is less than the number of supplementary group IDs, but is not zero."
5082 #: build/C/man2/getgroups.2:121
5083 msgid "B<setgroups>() can additionally fail with the following errors:"
5087 #: build/C/man2/getgroups.2:127
5089 "I<size> is greater than B<NGROUPS_MAX> (32 before Linux 2.6.4; 65536 since "
5094 #: build/C/man2/getgroups.2:133
5095 msgid "The calling process has insufficient privilege."
5099 #: build/C/man2/getgroups.2:141
5101 "SVr4, 4.3BSD. The B<getgroups>() function is in POSIX.1-2001. Since "
5102 "B<setgroups>() requires privilege, it is not covered by POSIX.1-2001."
5106 #: build/C/man2/getgroups.2:153
5108 "A process can have up to B<NGROUPS_MAX> supplementary group IDs in addition "
5109 "to the effective group ID. The constant B<NGROUPS_MAX> is defined in "
5110 "I<E<lt>limits.hE<gt>>. The set of supplementary group IDs is inherited from "
5111 "the parent process, and preserved across an B<execve>(2)."
5115 #: build/C/man2/getgroups.2:156
5117 "The maximum number of supplementary group IDs can be found at run time using "
5122 #: build/C/man2/getgroups.2:160
5125 " long ngroups_max;\n"
5126 " ngroups_max = sysconf(_SC_NGROUPS_MAX);\n"
5130 #: build/C/man2/getgroups.2:168
5132 "The maximum return value of B<getgroups>() cannot be larger than one more "
5133 "than this value. Since Linux 2.6.4, the maximum number of supplementary "
5134 "group IDs is also exposed via the Linux-specific read-only file, "
5135 "I</proc/sys/kernel/ngroups_max>."
5139 #: build/C/man2/getgroups.2:178
5141 "The original Linux B<getgroups>() system call supported only 16-bit group "
5142 "IDs. Subsequently, Linux 2.4 added B<getgroups32>(), supporting 32-bit "
5143 "IDs. The glibc B<getgroups>() wrapper function transparently deals with "
5144 "the variation across kernel versions."
5148 #: build/C/man2/getgroups.2:185
5150 "B<getgid>(2), B<setgid>(2), B<getgrouplist>(3), B<group_member>(3), "
5151 "B<initgroups>(3), B<capabilities>(7), B<credentials>(7)"
5155 #: build/C/man2/getpid.2:25
5161 #: build/C/man2/getpid.2:28
5162 msgid "getpid, getppid - get process identification"
5166 #: build/C/man2/getpid.2:34
5167 msgid "B<pid_t getpid(void);>"
5171 #: build/C/man2/getpid.2:36
5172 msgid "B<pid_t getppid(void);>"
5176 #: build/C/man2/getpid.2:41
5178 "B<getpid>() returns the process ID of the calling process. (This is often "
5179 "used by routines that generate unique temporary filenames.)"
5183 #: build/C/man2/getpid.2:44
5184 msgid "B<getppid>() returns the process ID of the parent of the calling process."
5188 #: build/C/man2/getpid.2:48
5189 msgid "POSIX.1-2001, 4.3BSD, SVr4."
5192 #. The following program demonstrates this "feature":
5194 #. #define _GNU_SOURCE
5195 #. #include <sys/syscall.h>
5196 #. #include <sys/wait.h>
5197 #. #include <stdio.h>
5198 #. #include <stdlib.h>
5199 #. #include <unistd.h>
5202 #. main(int argc, char *argv[])
5204 #. /* The following statement fills the getpid() cache */
5206 #. printf("parent PID = %ld
5207 #. ", (long) getpid());
5209 #. if (syscall(SYS_fork) == 0) {
5210 #. if (getpid() != syscall(SYS_getpid))
5211 #. printf("child getpid() mismatch: getpid()=%ld; "
5212 #. "syscall(SYS_getpid)=%ld
5214 #. (long) getpid(), (long) syscall(SYS_getpid));
5215 #. exit(EXIT_SUCCESS);
5220 #: build/C/man2/getpid.2:100
5222 "Since glibc version 2.3.4, the glibc wrapper function for B<getpid>() "
5223 "caches PIDs, so as to avoid additional system calls when a process calls "
5224 "B<getpid>() repeatedly. Normally this caching is invisible, but its "
5225 "correct operation relies on support in the wrapper functions for B<fork>(2), "
5226 "B<vfork>(2), and B<clone>(2): if an application bypasses the glibc wrappers "
5227 "for these system calls by using B<syscall>(2), then a call to B<getpid>() "
5228 "in the child will return the wrong value (to be precise: it will return the "
5229 "PID of the parent process). See also B<clone>(2) for discussion of a case "
5230 "where B<getpid>() may return the wrong value even when invoking B<clone>(2) "
5231 "via the glibc wrapper function."
5235 #: build/C/man2/getpid.2:110
5237 "B<clone>(2), B<fork>(2), B<kill>(2), B<exec>(3), B<mkstemp>(3), "
5238 "B<tempnam>(3), B<tmpfile>(3), B<tmpnam>(3), B<credentials>(7), "
5239 "B<pid_namespaces>(7)"
5243 #: build/C/man2/getpriority.2:45
5249 #: build/C/man2/getpriority.2:48
5250 msgid "getpriority, setpriority - get/set program scheduling priority"
5254 #: build/C/man2/getpriority.2:50 build/C/man2/getrlimit.2:69 build/C/man2/getrusage.2:44
5255 msgid "B<#include E<lt>sys/time.hE<gt>>"
5259 #: build/C/man2/getpriority.2:52 build/C/man2/getrlimit.2:71 build/C/man2/getrusage.2:46
5260 msgid "B<#include E<lt>sys/resource.hE<gt>>"
5264 #: build/C/man2/getpriority.2:54
5265 msgid "B<int getpriority(int >I<which>B<, id_t >I<who>B<);>"
5269 #: build/C/man2/getpriority.2:56
5270 msgid "B<int setpriority(int >I<which>B<, id_t >I<who>B<, int >I<prio>B<);>"
5274 #: build/C/man2/getpriority.2:67
5276 "The scheduling priority of the process, process group, or user, as indicated "
5277 "by I<which> and I<who> is obtained with the B<getpriority>() call and set "
5278 "with the B<setpriority>() call."
5282 #: build/C/man2/getpriority.2:94
5284 "The value I<which> is one of B<PRIO_PROCESS>, B<PRIO_PGRP>, or B<PRIO_USER>, "
5285 "and I<who> is interpreted relative to I<which> (a process identifier for "
5286 "B<PRIO_PROCESS>, process group identifier for B<PRIO_PGRP>, and a user ID "
5287 "for B<PRIO_USER>). A zero value for I<who> denotes (respectively) the "
5288 "calling process, the process group of the calling process, or the real user "
5289 "ID of the calling process. I<Prio> is a value in the range -20 to 19 (but "
5290 "see the Notes below). The default priority is 0; lower priorities cause "
5291 "more favorable scheduling."
5295 #: build/C/man2/getpriority.2:104
5297 "The B<getpriority>() call returns the highest priority (lowest numerical "
5298 "value) enjoyed by any of the specified processes. The B<setpriority>() "
5299 "call sets the priorities of all of the specified processes to the specified "
5300 "value. Only the superuser may lower priorities."
5304 #: build/C/man2/getpriority.2:117
5306 "Since B<getpriority>() can legitimately return the value -1, it is "
5307 "necessary to clear the external variable I<errno> prior to the call, then "
5308 "check it afterward to determine if -1 is an error or a legitimate value. "
5309 "The B<setpriority>() call returns 0 if there is no error, or -1 if there "
5314 #: build/C/man2/getpriority.2:126
5315 msgid "I<which> was not one of B<PRIO_PROCESS>, B<PRIO_PGRP>, or B<PRIO_USER>."
5319 #: build/C/man2/getpriority.2:133
5320 msgid "No process was located using the I<which> and I<who> values specified."
5324 #: build/C/man2/getpriority.2:137
5325 msgid "In addition to the errors indicated above, B<setpriority>() may fail if:"
5329 #: build/C/man2/getpriority.2:149
5331 "The caller attempted to lower a process priority, but did not have the "
5332 "required privilege (on Linux: did not have the B<CAP_SYS_NICE> capability). "
5333 "Since Linux 2.6.12, this error occurs only if the caller attempts to set a "
5334 "process priority outside the range of the B<RLIMIT_NICE> soft resource limit "
5335 "of the target process; see B<getrlimit>(2) for details."
5339 #: build/C/man2/getpriority.2:157
5341 "A process was located, but its effective user ID did not match either the "
5342 "effective or the real user ID of the caller, and was not privileged (on "
5343 "Linux: did not have the B<CAP_SYS_NICE> capability). But see NOTES below."
5347 #: build/C/man2/getpriority.2:160
5348 msgid "SVr4, 4.4BSD (these function calls first appeared in 4.2BSD), POSIX.1-2001."
5352 #: build/C/man2/getpriority.2:166
5354 "A child created by B<fork>(2) inherits its parent's nice value. The nice "
5355 "value is preserved across B<execve>(2)."
5359 #: build/C/man2/getpriority.2:177
5361 "The degree to which their relative nice value affects the scheduling of "
5362 "processes varies across UNIX systems, and, on Linux, across kernel "
5363 "versions. Starting with kernel 2.6.23, Linux adopted an algorithm that "
5364 "causes relative differences in nice values to have a much stronger effect. "
5365 "This causes very low nice values (+19) to truly provide little CPU to a "
5366 "process whenever there is any other higher priority load on the system, and "
5367 "makes high nice values (-20) deliver most of the CPU to applications that "
5368 "require it (e.g., some audio applications)."
5372 #: build/C/man2/getpriority.2:192
5374 "The details on the condition for B<EPERM> depend on the system. The above "
5375 "description is what POSIX.1-2001 says, and seems to be followed on all "
5376 "System\\ V-like systems. Linux kernels before 2.6.12 required the real or "
5377 "effective user ID of the caller to match the real user of the process I<who> "
5378 "(instead of its effective user ID). Linux 2.6.12 and later require the "
5379 "effective user ID of the caller to match the real or effective user ID of "
5380 "the process I<who>. All BSD-like systems (SunOS 4.1.3, Ultrix 4.2, 4.3BSD, "
5381 "FreeBSD 4.3, OpenBSD-2.5, ...) behave in the same manner as Linux 2.6.12 and "
5386 #: build/C/man2/getpriority.2:197
5388 "The actual priority range varies between kernel versions. Linux before "
5389 "1.3.36 had -infinity..15. Since kernel 1.3.43, Linux has the range "
5390 "-20..19. On some other systems, the range of nice values is -20..20."
5394 #: build/C/man2/getpriority.2:210
5396 "Including I<E<lt>sys/time.hE<gt>> is not required these days, but increases "
5397 "portability. (Indeed, I<E<lt>sys/resource.hE<gt>> defines the I<rusage> "
5398 "structure with fields of type I<struct timeval> defined in "
5399 "I<E<lt>sys/time.hE<gt>>.)"
5403 #: build/C/man2/getpriority.2:210 build/C/man2/seteuid.2:132
5405 msgid "C library/kernel ABI differences"
5409 #: build/C/man2/getpriority.2:225
5411 "Within the kernel, nice values are actually represented using the range "
5412 "40..1 (since negative numbers are error codes) and these are the values "
5413 "employed by the B<setpriority>() and B<getpriority>() system calls. The "
5414 "glibc wrapper functions for these system calls handle the translations "
5415 "between the user-land and kernel representations of the nice value according "
5416 "to the formula I<unice\\ =\\ 20\\ -\\ knice>. (Thus, the kernels 40..1 "
5417 "range corresponds to the range -20..19 as seen by user space.)"
5421 #: build/C/man2/getpriority.2:232
5423 "According to POSIX, the nice value is a per-process setting. However, under "
5424 "the current Linux/NPTL implementation of POSIX threads, the nice value is a "
5425 "per-thread attribute: different threads in the same process can have "
5426 "different nice values. Portable applications should avoid relying on the "
5427 "Linux behavior, which may be made standards conformant in the future."
5431 #: build/C/man2/getpriority.2:238
5432 msgid "B<nice>(1), B<renice>(1), B<fork>(2), B<capabilities>(7), B<sched>(7)"
5436 #: build/C/man2/getpriority.2:240
5438 "I<Documentation/scheduler/sched-nice-design.txt> in the Linux kernel source "
5439 "tree (since Linux 2.6.23)"
5443 #: build/C/man2/getresuid.2:28
5449 #: build/C/man2/getresuid.2:31
5450 msgid "getresuid, getresgid - get real, effective and saved user/group IDs"
5454 #: build/C/man2/getresuid.2:33 build/C/man2/setresuid.2:31
5455 msgid "B<#define _GNU_SOURCE> /* See feature_test_macros(7) */"
5459 #: build/C/man2/getresuid.2:37
5460 msgid "B<int getresuid(uid_t *>I<ruid>B<, uid_t *>I<euid>B<, uid_t *>I<suid>B<);>"
5464 #: build/C/man2/getresuid.2:39
5465 msgid "B<int getresgid(gid_t *>I<rgid>B<, gid_t *>I<egid>B<, gid_t *>I<sgid>B<);>"
5469 #: build/C/man2/getresuid.2:50
5471 "B<getresuid>() returns the real UID, the effective UID, and the saved "
5472 "set-user-ID of the calling process, in the arguments I<ruid>, I<euid>, and "
5473 "I<suid>, respectively. B<getresgid>() performs the analogous task for the "
5474 "process's group IDs."
5478 #: build/C/man2/getresuid.2:60
5480 "One of the arguments specified an address outside the calling program's "
5485 #: build/C/man2/getresuid.2:62
5486 msgid "These system calls appeared on Linux starting with kernel 2.1.44."
5490 #: build/C/man2/getresuid.2:67
5492 "The prototypes are given by glibc since version 2.3.2, provided "
5493 "B<_GNU_SOURCE> is defined."
5497 #: build/C/man2/getresuid.2:70 build/C/man2/setresuid.2:112
5498 msgid "These calls are nonstandard; they also appear on HP-UX and some of the BSDs."
5502 #: build/C/man2/getresuid.2:86
5504 "The original Linux B<getresuid>() and B<getresgid>() system calls "
5505 "supported only 16-bit user and group IDs. Subsequently, Linux 2.4 added "
5506 "B<getresuid32>() and B<getresgid32>(), supporting 32-bit IDs. The glibc "
5507 "B<getresuid>() and B<getresgid>() wrapper functions transparently deal "
5508 "with the variations across kernel versions."
5512 #: build/C/man2/getresuid.2:91
5514 "B<getuid>(2), B<setresuid>(2), B<setreuid>(2), B<setuid>(2), "
5519 #: build/C/man2/getrlimit.2:64
5525 #: build/C/man2/getrlimit.2:64
5531 #: build/C/man2/getrlimit.2:67
5532 msgid "getrlimit, setrlimit, prlimit - get/set resource limits"
5536 #: build/C/man2/getrlimit.2:73
5537 msgid "B<int getrlimit(int >I<resource>B<, struct rlimit *>I<rlim>B<);>"
5541 #: build/C/man2/getrlimit.2:75
5542 msgid "B<int setrlimit(int >I<resource>B<, const struct rlimit *>I<rlim>B<);>"
5546 #: build/C/man2/getrlimit.2:78
5548 "B<int prlimit(pid_t >I<pid>B<, int >I<resource>B<, const struct rlimit "
5549 "*>I<new_limit>B<,>"
5553 #: build/C/man2/getrlimit.2:80
5554 msgid "B< struct rlimit *>I<old_limit>B<);>"
5558 #: build/C/man2/getrlimit.2:88
5559 msgid "B<prlimit>(): _GNU_SOURCE && _FILE_OFFSET_BITS == 64"
5563 #: build/C/man2/getrlimit.2:97
5565 "The B<getrlimit>() and B<setrlimit>() system calls get and set resource "
5566 "limits respectively. Each resource has an associated soft and hard limit, "
5567 "as defined by the I<rlimit> structure:"
5571 #: build/C/man2/getrlimit.2:104
5575 " rlim_t rlim_cur; /* Soft limit */\n"
5576 " rlim_t rlim_max; /* Hard limit (ceiling for rlim_cur) */\n"
5581 #: build/C/man2/getrlimit.2:115
5583 "The soft limit is the value that the kernel enforces for the corresponding "
5584 "resource. The hard limit acts as a ceiling for the soft limit: an "
5585 "unprivileged process may set only its soft limit to a value in the range "
5586 "from 0 up to the hard limit, and (irreversibly) lower its hard limit. A "
5587 "privileged process (under Linux: one with the B<CAP_SYS_RESOURCE> "
5588 "capability) may make arbitrary changes to either limit value."
5592 #: build/C/man2/getrlimit.2:122
5594 "The value B<RLIM_INFINITY> denotes no limit on a resource (both in the "
5595 "structure returned by B<getrlimit>() and in the structure passed to "
5600 #: build/C/man2/getrlimit.2:126
5601 msgid "The I<resource> argument must be one of:"
5605 #: build/C/man2/getrlimit.2:126
5607 msgid "B<RLIMIT_AS>"
5610 #. since 2.0.27 / 2.1.12
5612 #: build/C/man2/getrlimit.2:146
5614 "The maximum size of the process's virtual memory (address space) in bytes. "
5615 "This limit affects calls to B<brk>(2), B<mmap>(2), and B<mremap>(2), which "
5616 "fail with the error B<ENOMEM> upon exceeding this limit. Also automatic "
5617 "stack expansion will fail (and generate a B<SIGSEGV> that kills the process "
5618 "if no alternate stack has been made available via B<sigaltstack>(2)). Since "
5619 "the value is a I<long>, on machines with a 32-bit I<long> either this limit "
5620 "is at most 2 GiB, or this resource is unlimited."
5624 #: build/C/man2/getrlimit.2:146
5626 msgid "B<RLIMIT_CORE>"
5630 #: build/C/man2/getrlimit.2:154
5632 "Maximum size of a I<core> file (see B<core>(5)). When 0 no core dump files "
5633 "are created. When nonzero, larger dumps are truncated to this size."
5637 #: build/C/man2/getrlimit.2:154
5639 msgid "B<RLIMIT_CPU>"
5643 #: build/C/man2/getrlimit.2:174
5645 "CPU time limit in seconds. When the process reaches the soft limit, it is "
5646 "sent a B<SIGXCPU> signal. The default action for this signal is to "
5647 "terminate the process. However, the signal can be caught, and the handler "
5648 "can return control to the main program. If the process continues to consume "
5649 "CPU time, it will be sent B<SIGXCPU> once per second until the hard limit is "
5650 "reached, at which time it is sent B<SIGKILL>. (This latter point describes "
5651 "Linux behavior. Implementations vary in how they treat processes which "
5652 "continue to consume CPU time after reaching the soft limit. Portable "
5653 "applications that need to catch this signal should perform an orderly "
5654 "termination upon first receipt of B<SIGXCPU>.)"
5658 #: build/C/man2/getrlimit.2:174
5660 msgid "B<RLIMIT_DATA>"
5664 #: build/C/man2/getrlimit.2:185
5666 "The maximum size of the process's data segment (initialized data, "
5667 "uninitialized data, and heap). This limit affects calls to B<brk>(2) and "
5668 "B<sbrk>(2), which fail with the error B<ENOMEM> upon encountering the soft "
5669 "limit of this resource."
5673 #: build/C/man2/getrlimit.2:185
5675 msgid "B<RLIMIT_FSIZE>"
5679 #: build/C/man2/getrlimit.2:197
5681 "The maximum size of files that the process may create. Attempts to extend a "
5682 "file beyond this limit result in delivery of a B<SIGXFSZ> signal. By "
5683 "default, this signal terminates a process, but a process can catch this "
5684 "signal instead, in which case the relevant system call (e.g., B<write>(2), "
5685 "B<truncate>(2)) fails with the error B<EFBIG>."
5689 #: build/C/man2/getrlimit.2:197
5691 msgid "B<RLIMIT_LOCKS> (Early Linux 2.4 only)"
5694 #. to be precise: Linux 2.4.0-test9; no longer in 2.4.25 / 2.5.65
5696 #: build/C/man2/getrlimit.2:205
5698 "A limit on the combined number of B<flock>(2) locks and B<fcntl>(2) leases "
5699 "that this process may establish."
5703 #: build/C/man2/getrlimit.2:205
5705 msgid "B<RLIMIT_MEMLOCK>"
5709 #: build/C/man2/getrlimit.2:243
5711 "The maximum number of bytes of memory that may be locked into RAM. In "
5712 "effect this limit is rounded down to the nearest multiple of the system page "
5713 "size. This limit affects B<mlock>(2) and B<mlockall>(2) and the "
5714 "B<mmap>(2) B<MAP_LOCKED> operation. Since Linux 2.6.9 it also affects the "
5715 "B<shmctl>(2) B<SHM_LOCK> operation, where it sets a maximum on the total "
5716 "bytes in shared memory segments (see B<shmget>(2)) that may be locked by "
5717 "the real user ID of the calling process. The B<shmctl>(2) B<SHM_LOCK> "
5718 "locks are accounted for separately from the per-process memory locks "
5719 "established by B<mlock>(2), B<mlockall>(2), and B<mmap>(2) B<MAP_LOCKED>; a "
5720 "process can lock bytes up to this limit in each of these two categories. In "
5721 "Linux kernels before 2.6.9, this limit controlled the amount of memory that "
5722 "could be locked by a privileged process. Since Linux 2.6.9, no limits are "
5723 "placed on the amount of memory that a privileged process may lock, and this "
5724 "limit instead governs the amount of memory that an unprivileged process may "
5729 #: build/C/man2/getrlimit.2:243
5731 msgid "B<RLIMIT_MSGQUEUE> (since Linux 2.6.8)"
5735 #: build/C/man2/getrlimit.2:251
5737 "Specifies the limit on the number of bytes that can be allocated for POSIX "
5738 "message queues for the real user ID of the calling process. This limit is "
5739 "enforced for B<mq_open>(3). Each message queue that the user creates counts "
5740 "(until it is removed) against this limit according to the formula:"
5744 #: build/C/man2/getrlimit.2:260
5747 " Since Linux 3.5:\n"
5748 " bytes = attr.mq_maxmsg * sizeof(struct msg_msg) +\n"
5749 " min(attr.mq_maxmsg, MQ_PRIO_MAX) *\n"
5750 " sizeof(struct posix_msg_tree_node)+\n"
5751 " /* For overhead */\n"
5752 " attr.mq_maxmsg * attr.mq_msgsize;\n"
5753 " /* For message data */\n"
5757 #: build/C/man2/getrlimit.2:266
5760 " Linux 3.4 and earlier:\n"
5761 " bytes = attr.mq_maxmsg * sizeof(struct msg_msg *) +\n"
5762 " /* For overhead */\n"
5763 " attr.mq_maxmsg * attr.mq_msgsize;\n"
5764 " /* For message data */\n"
5768 #: build/C/man2/getrlimit.2:279
5770 "where I<attr> is the I<mq_attr> structure specified as the fourth argument "
5771 "to B<mq_open>(3), and the I<msg_msg> and I<posix_msg_tree_node> structures "
5772 "are kernel-internal structures."
5776 #: build/C/man2/getrlimit.2:285
5778 "The \"overhead\" addend in the formula accounts for overhead bytes required "
5779 "by the implementation and ensures that the user cannot create an unlimited "
5780 "number of zero-length messages (such messages nevertheless each consume some "
5781 "system memory for bookkeeping overhead)."
5785 #: build/C/man2/getrlimit.2:285
5787 msgid "B<RLIMIT_NICE> (since Linux 2.6.12, but see BUGS below)"
5791 #: build/C/man2/getrlimit.2:298
5793 "Specifies a ceiling to which the process's nice value can be raised using "
5794 "B<setpriority>(2) or B<nice>(2). The actual ceiling for the nice value is "
5795 "calculated as I<20\\ -\\ rlim_cur>. (This strangeness occurs because "
5796 "negative numbers cannot be specified as resource limit values, since they "
5797 "typically have special meanings. For example, B<RLIM_INFINITY> typically is "
5802 #: build/C/man2/getrlimit.2:298
5804 msgid "B<RLIMIT_NOFILE>"
5808 #: build/C/man2/getrlimit.2:312
5810 "Specifies a value one greater than the maximum file descriptor number that "
5811 "can be opened by this process. Attempts (B<open>(2), B<pipe>(2), B<dup>(2), "
5812 "etc.) to exceed this limit yield the error B<EMFILE>. (Historically, this "
5813 "limit was named B<RLIMIT_OFILE> on BSD.)"
5817 #: build/C/man2/getrlimit.2:312
5819 msgid "B<RLIMIT_NPROC>"
5823 #: build/C/man2/getrlimit.2:325
5825 "The maximum number of processes (or, more precisely on Linux, threads) that "
5826 "can be created for the real user ID of the calling process. Upon "
5827 "encountering this limit, B<fork>(2) fails with the error B<EAGAIN>. This "
5828 "limit is not enforced for processes that have either the B<CAP_SYS_ADMIN> or "
5829 "the B<CAP_SYS_RESOURCE> capability."
5833 #: build/C/man2/getrlimit.2:325
5835 msgid "B<RLIMIT_RSS>"
5838 #. As at kernel 2.6.12, this limit still does nothing in 2.6 though
5839 #. talk of making it do something has surfaced from time to time in LKML
5842 #: build/C/man2/getrlimit.2:337
5844 "Specifies the limit (in pages) of the process's resident set (the number of "
5845 "virtual pages resident in RAM). This limit has effect only in Linux 2.4.x, "
5846 "x E<lt> 30, and there affects only calls to B<madvise>(2) specifying "
5851 #: build/C/man2/getrlimit.2:337
5853 msgid "B<RLIMIT_RTPRIO> (since Linux 2.6.12, but see BUGS)"
5857 #: build/C/man2/getrlimit.2:344
5859 "Specifies a ceiling on the real-time priority that may be set for this "
5860 "process using B<sched_setscheduler>(2) and B<sched_setparam>(2)."
5864 #: build/C/man2/getrlimit.2:344
5866 msgid "B<RLIMIT_RTTIME> (since Linux 2.6.25)"
5870 #: build/C/man2/getrlimit.2:356
5872 "Specifies a limit (in microseconds) on the amount of CPU time that a "
5873 "process scheduled under a real-time scheduling policy may consume without "
5874 "making a blocking system call. For the purpose of this limit, each time a "
5875 "process makes a blocking system call, the count of its consumed CPU time is "
5876 "reset to zero. The CPU time count is not reset if the process continues "
5877 "trying to use the CPU but is preempted, its time slice expires, or it calls "
5878 "B<sched_yield>(2)."
5882 #: build/C/man2/getrlimit.2:367
5884 "Upon reaching the soft limit, the process is sent a B<SIGXCPU> signal. If "
5885 "the process catches or ignores this signal and continues consuming CPU time, "
5886 "then B<SIGXCPU> will be generated once each second until the hard limit is "
5887 "reached, at which point the process is sent a B<SIGKILL> signal."
5891 #: build/C/man2/getrlimit.2:370
5893 "The intended use of this limit is to stop a runaway real-time process from "
5894 "locking up the system."
5898 #: build/C/man2/getrlimit.2:370
5900 msgid "B<RLIMIT_SIGPENDING> (since Linux 2.6.8)"
5903 #. This replaces the /proc/sys/kernel/rtsig-max system-wide limit
5904 #. that was present in kernels <= 2.6.7. MTK Dec 04
5906 #: build/C/man2/getrlimit.2:384
5908 "Specifies the limit on the number of signals that may be queued for the real "
5909 "user ID of the calling process. Both standard and real-time signals are "
5910 "counted for the purpose of checking this limit. However, the limit is "
5911 "enforced only for B<sigqueue>(3); it is always possible to use B<kill>(2) "
5912 "to queue one instance of any of the signals that are not already queued to "
5917 #: build/C/man2/getrlimit.2:384
5919 msgid "B<RLIMIT_STACK>"
5923 #: build/C/man2/getrlimit.2:392
5925 "The maximum size of the process stack, in bytes. Upon reaching this limit, "
5926 "a B<SIGSEGV> signal is generated. To handle this signal, a process must "
5927 "employ an alternate signal stack (B<sigaltstack>(2))."
5931 #: build/C/man2/getrlimit.2:397
5933 "Since Linux 2.6.23, this limit also determines the amount of space used for "
5934 "the process's command-line arguments and environment variables; for details, "
5939 #: build/C/man2/getrlimit.2:397
5944 #. commit c022a0acad534fd5f5d5f17280f6d4d135e74e81
5945 #. Author: Jiri Slaby <jslaby@suse.cz>
5946 #. Date: Tue May 4 18:03:50 2010 +0200
5948 #. rlimits: implement prlimit64 syscall
5950 #. commit 6a1d5e2c85d06da35cdfd93f1a27675bfdc3ad8c
5951 #. Author: Jiri Slaby <jslaby@suse.cz>
5952 #. Date: Wed Mar 24 17:06:58 2010 +0100
5954 #. rlimits: add rlimit64 structure
5956 #: build/C/man2/getrlimit.2:417
5958 "The Linux-specific B<prlimit>() system call combines and extends the "
5959 "functionality of B<setrlimit>() and B<getrlimit>(). It can be used to both "
5960 "set and get the resource limits of an arbitrary process."
5964 #: build/C/man2/getrlimit.2:424
5966 "The I<resource> argument has the same meaning as for B<setrlimit>() and "
5971 #: build/C/man2/getrlimit.2:442
5973 "If the I<new_limit> argument is a not NULL, then the I<rlimit> structure to "
5974 "which it points is used to set new values for the soft and hard limits for "
5975 "I<resource>. If the I<old_limit> argument is a not NULL, then a successful "
5976 "call to B<prlimit>() places the previous soft and hard limits for "
5977 "I<resource> in the I<rlimit> structure pointed to by I<old_limit>."
5980 #. FIXME . this permission check is strange
5981 #. Asked about this on LKML, 7 Nov 2010
5982 #. "Inconsistent credential checking in prlimit() syscall"
5984 #: build/C/man2/getrlimit.2:461
5986 "The I<pid> argument specifies the ID of the process on which the call is to "
5987 "operate. If I<pid> is 0, then the call applies to the calling process. To "
5988 "set or get the resources of a process other than itself, the caller must "
5989 "have the B<CAP_SYS_RESOURCE> capability, or the real, effective, and saved "
5990 "set user IDs of the target process must match the real user ID of the caller "
5991 "I<and> the real, effective, and saved set group IDs of the target process "
5992 "must match the real group ID of the caller."
5996 #: build/C/man2/getrlimit.2:466
5998 "On success, these system calls return 0. On error, -1 is returned, and "
5999 "I<errno> is set appropriately."
6003 #: build/C/man2/getrlimit.2:471
6005 "A pointer argument points to a location outside the accessible address "
6010 #: build/C/man2/getrlimit.2:483
6012 "The value specified in I<resource> is not valid; or, for B<setrlimit>() or "
6013 "B<prlimit>(): I<rlim-E<gt>rlim_cur> was greater than I<rlim-E<gt>rlim_max>."
6017 #: build/C/man2/getrlimit.2:488
6019 "An unprivileged process tried to raise the hard limit; the "
6020 "B<CAP_SYS_RESOURCE> capability is required to do this."
6024 #: build/C/man2/getrlimit.2:496
6026 "The caller tried to increase the hard B<RLIMIT_NOFILE> limit above the "
6027 "maximum defined by I</proc/sys/fs/nr_open> (see B<proc>(5))"
6031 #: build/C/man2/getrlimit.2:502
6033 "(B<prlimit>()) The calling process did not have permission to set limits "
6034 "for the process specified by I<pid>."
6038 #: build/C/man2/getrlimit.2:506
6039 msgid "Could not find a process with the ID specified in I<pid>."
6043 #: build/C/man2/getrlimit.2:511
6045 "The B<prlimit>() system call is available since Linux 2.6.36. Library "
6046 "support is available since glibc 2.13."
6050 #: build/C/man2/getrlimit.2:515
6051 msgid "B<getrlimit>(), B<setrlimit>(): SVr4, 4.3BSD, POSIX.1-2001."
6055 #: build/C/man2/getrlimit.2:518
6056 msgid "B<prlimit>(): Linux-specific."
6060 #: build/C/man2/getrlimit.2:534
6062 "B<RLIMIT_MEMLOCK> and B<RLIMIT_NPROC> derive from BSD and are not specified "
6063 "in POSIX.1-2001; they are present on the BSDs and Linux, but on few other "
6064 "implementations. B<RLIMIT_RSS> derives from BSD and is not specified in "
6065 "POSIX.1-2001; it is nevertheless present on most implementations. "
6066 "B<RLIMIT_MSGQUEUE>, B<RLIMIT_NICE>, B<RLIMIT_RTPRIO>, B<RLIMIT_RTTIME>, and "
6067 "B<RLIMIT_SIGPENDING> are Linux-specific."
6071 #: build/C/man2/getrlimit.2:540
6073 "A child process created via B<fork>(2) inherits its parent's resource "
6074 "limits. Resource limits are preserved across B<execve>(2)."
6078 #: build/C/man2/getrlimit.2:545
6080 "Lowering the soft limit for a resource below the process's current "
6081 "consumption of that resource will succeed (but will prevent the process from "
6082 "further increasing its consumption of the resource)."
6086 #: build/C/man2/getrlimit.2:554
6088 "One can set the resource limits of the shell using the built-in I<ulimit> "
6089 "command (I<limit> in B<csh>(1)). The shell's resource limits are inherited "
6090 "by the processes that it creates to execute commands."
6094 #: build/C/man2/getrlimit.2:559
6096 "Since Linux 2.6.24, the resource limits of any process can be inspected via "
6097 "I</proc/[pid]/limits>; see B<proc>(5)."
6101 #: build/C/man2/getrlimit.2:568
6103 "Ancient systems provided a B<vlimit>() function with a similar purpose to "
6104 "B<setrlimit>(). For backward compatibility, glibc also provides "
6105 "B<vlimit>(). All new applications should be written using B<setrlimit>()."
6109 #: build/C/man2/getrlimit.2:568
6111 msgid "C library/ kernel ABI differences"
6115 #: build/C/man2/getrlimit.2:577
6117 "Since version 2.13, the glibc B<getrlimit>() and B<setrlimit>() wrapper "
6118 "functions no longer invoke the corresponding system calls, but instead "
6119 "employ B<prlimit>(), for the reasons described in BUGS."
6123 #: build/C/man2/getrlimit.2:586
6125 "In older Linux kernels, the B<SIGXCPU> and B<SIGKILL> signals delivered when "
6126 "a process encountered the soft and hard B<RLIMIT_CPU> limits were delivered "
6127 "one (CPU) second later than they should have been. This was fixed in kernel "
6131 #. see http://marc.theaimsgroup.com/?l=linux-kernel&m=114008066530167&w=2
6133 #: build/C/man2/getrlimit.2:594
6135 "In 2.6.x kernels before 2.6.17, a B<RLIMIT_CPU> limit of 0 is wrongly "
6136 "treated as \"no limit\" (like B<RLIM_INFINITY>). Since Linux 2.6.17, "
6137 "setting a limit of 0 does have an effect, but is actually treated as a limit "
6141 #. See https://lwn.net/Articles/145008/
6143 #: build/C/man2/getrlimit.2:599
6145 "A kernel bug means that B<RLIMIT_RTPRIO> does not work in kernel 2.6.12; the "
6146 "problem is fixed in kernel 2.6.13."
6149 #. see http://marc.theaimsgroup.com/?l=linux-kernel&m=112256338703880&w=2
6151 #: build/C/man2/getrlimit.2:610
6153 "In kernel 2.6.12, there was an off-by-one mismatch between the priority "
6154 "ranges returned by B<getpriority>(2) and B<RLIMIT_NICE>. This had the "
6155 "effect that the actual ceiling for the nice value was calculated as I<19\\ "
6156 "-\\ rlim_cur>. This was fixed in kernel 2.6.13."
6159 #. The relevant patch, sent to LKML, seems to be
6160 #. http://thread.gmane.org/gmane.linux.kernel/273462
6161 #. From: Roland McGrath <roland <at> redhat.com>
6162 #. Subject: [PATCH 7/7] make RLIMIT_CPU/SIGXCPU per-process
6163 #. Date: 2005-01-23 23:27:46 GMT
6164 #. Tested Solaris 10, FreeBSD 9, OpenBSD 5.0
6165 #. FIXME . https://bugzilla.kernel.org/show_bug.cgi?id=50951
6167 #: build/C/man2/getrlimit.2:637
6169 "Since Linux 2.6.12, if a process reaches its soft B<RLIMIT_CPU> limit and "
6170 "has a handler installed for B<SIGXCPU>, then, in addition to invoking the "
6171 "signal handler, the kernel increases the soft limit by one second. This "
6172 "behavior repeats if the process continues to consume CPU time, until the "
6173 "hard limit is reached, at which point the process is killed. Other "
6174 "implementations do not change the B<RLIMIT_CPU> soft limit in this manner, "
6175 "and the Linux behavior is probably not standards conformant; portable "
6176 "applications should avoid relying on this Linux-specific behavior. The "
6177 "Linux-specific B<RLIMIT_RTTIME> limit exhibits the same behavior when the "
6178 "soft limit is encountered."
6182 #: build/C/man2/getrlimit.2:647
6184 "Kernels before 2.4.22 did not diagnose the error B<EINVAL> for "
6185 "B<setrlimit>() when I<rlim-E<gt>rlim_cur> was greater than "
6186 "I<rlim-E<gt>rlim_max>."
6190 #: build/C/man2/getrlimit.2:647
6192 msgid "Representation of \"large\" resource limit values on 32-bit platforms"
6195 #. https://bugzilla.kernel.org/show_bug.cgi?id=5042
6196 #. http://sources.redhat.com/bugzilla/show_bug.cgi?id=12201
6198 #: build/C/man2/getrlimit.2:678
6200 "The glibc B<getrlimit>() and B<setrlimit>() wrapper functions use a 64-bit "
6201 "I<rlim_t> data type, even on 32-bit platforms. However, the I<rlim_t> data "
6202 "type used in the B<getrlimit>() and B<setrlimit>() system calls is a "
6203 "(32-bit) I<unsigned long>. Furthermore, in Linux versions before 2.6.36, "
6204 "the kernel represents resource limits on 32-bit platforms as I<unsigned "
6205 "long>. However, a 32-bit data type is not wide enough. The most pertinent "
6206 "limit here is B<RLIMIT_FSIZE>, which specifies the maximum size to which a "
6207 "file can grow: to be useful, this limit must be represented using a type "
6208 "that is as wide as the type used to represent file offsets\\(emthat is, as "
6209 "wide as a 64-bit B<off_t> (assuming a program compiled with "
6210 "I<_FILE_OFFSET_BITS=64>)."
6214 #: build/C/man2/getrlimit.2:688
6216 "To work around this kernel limitation, if a program tried to set a resource "
6217 "limit to a value larger than can be represented in a 32-bit I<unsigned "
6218 "long>, then the glibc B<setrlimit>() wrapper function silently converted "
6219 "the limit value to B<RLIM_INFINITY>. In other words, the requested resource "
6220 "limit setting was silently ignored."
6224 #: build/C/man2/getrlimit.2:690
6225 msgid "This problem was addressed in Linux 2.6.36 with two principal changes:"
6229 #: build/C/man2/getrlimit.2:693
6231 "the addition of a new kernel representation of resource limits that uses 64 "
6232 "bits, even on 32-bit platforms;"
6236 #: build/C/man2/getrlimit.2:697
6238 "the addition of the B<prlimit>() system call, which employs 64-bit values "
6239 "for its resource limit arguments."
6242 #. https://www.sourceware.org/bugzilla/show_bug.cgi?id=12201
6244 #: build/C/man2/getrlimit.2:710
6246 "Since version 2.13, glibc works around the limitations of the B<getrlimit>() "
6247 "and B<setrlimit>() system calls by implementing B<setrlimit>() and "
6248 "B<getrlimit>() as wrapper functions that call B<prlimit>()."
6252 #: build/C/man2/getrlimit.2:713
6253 msgid "The program below demonstrates the use of B<prlimit>()."
6257 #: build/C/man2/getrlimit.2:722
6260 "#define _GNU_SOURCE\n"
6261 "#define _FILE_OFFSET_BITS 64\n"
6262 "#include E<lt>stdio.hE<gt>\n"
6263 "#include E<lt>time.hE<gt>\n"
6264 "#include E<lt>stdlib.hE<gt>\n"
6265 "#include E<lt>unistd.hE<gt>\n"
6266 "#include E<lt>sys/resource.hE<gt>\n"
6270 #: build/C/man2/getrlimit.2:725
6273 "#define errExit(msg) \tdo { perror(msg); exit(EXIT_FAILURE); \\e\n"
6278 #: build/C/man2/getrlimit.2:732
6282 "main(int argc, char *argv[])\n"
6284 " struct rlimit old, new;\n"
6285 " struct rlimit *newp;\n"
6290 #: build/C/man2/getrlimit.2:738
6293 " if (!(argc == 2 || argc == 4)) {\n"
6294 " fprintf(stderr, \"Usage: %s E<lt>pidE<gt> [E<lt>new-soft-limitE<gt> "
6296 " \"E<lt>new-hard-limitE<gt>]\\en\", argv[0]);\n"
6297 " exit(EXIT_FAILURE);\n"
6302 #: build/C/man2/getrlimit.2:740
6304 msgid " pid = atoi(argv[1]); /* PID of target process */\n"
6308 #: build/C/man2/getrlimit.2:747
6312 " if (argc == 4) {\n"
6313 " new.rlim_cur = atoi(argv[2]);\n"
6314 " new.rlim_max = atoi(argv[3]);\n"
6320 #: build/C/man2/getrlimit.2:750
6323 " /* Set CPU time limit of target process; retrieve and display\n"
6324 " previous limit */\n"
6328 #: build/C/man2/getrlimit.2:755
6331 " if (prlimit(pid, RLIMIT_CPU, newp, &old) == -1)\n"
6332 " errExit(\"prlimit-1\");\n"
6333 " printf(\"Previous limits: soft=%lld; hard=%lld\\en\",\n"
6334 " (long long) old.rlim_cur, (long long) old.rlim_max);\n"
6338 #: build/C/man2/getrlimit.2:757
6340 msgid " /* Retrieve and display new CPU time limit */\n"
6344 #: build/C/man2/getrlimit.2:762
6347 " if (prlimit(pid, RLIMIT_CPU, NULL, &old) == -1)\n"
6348 " errExit(\"prlimit-2\");\n"
6349 " printf(\"New limits: soft=%lld; hard=%lld\\en\",\n"
6350 " (long long) old.rlim_cur, (long long) old.rlim_max);\n"
6354 #: build/C/man2/getrlimit.2:765 build/C/man7/user_namespaces.7:829
6357 " exit(EXIT_FAILURE);\n"
6362 #: build/C/man2/getrlimit.2:783
6364 "B<prlimit>(1), B<dup>(2), B<fcntl>(2), B<fork>(2), B<getrusage>(2), "
6365 "B<mlock>(2), B<mmap>(2), B<open>(2), B<quotactl>(2), B<sbrk>(2), "
6366 "B<shmctl>(2), B<malloc>(3), B<sigqueue>(3), B<ulimit>(3), B<core>(5), "
6367 "B<capabilities>(7), B<signal>(7)"
6371 #: build/C/man2/getrusage.2:39
6377 #: build/C/man2/getrusage.2:39
6383 #: build/C/man2/getrusage.2:42
6384 msgid "getrusage - get resource usage"
6388 #: build/C/man2/getrusage.2:48
6389 msgid "B<int getrusage(int >I<who>B<, struct rusage *>I<usage>B<);>"
6393 #: build/C/man2/getrusage.2:54
6395 "B<getrusage>() returns resource usage measures for I<who>, which can be one "
6400 #: build/C/man2/getrusage.2:54
6402 msgid "B<RUSAGE_SELF>"
6406 #: build/C/man2/getrusage.2:58
6408 "Return resource usage statistics for the calling process, which is the sum "
6409 "of resources used by all threads in the process."
6413 #: build/C/man2/getrusage.2:58
6415 msgid "B<RUSAGE_CHILDREN>"
6419 #: build/C/man2/getrusage.2:65
6421 "Return resource usage statistics for all children of the calling process "
6422 "that have terminated and been waited for. These statistics will include the "
6423 "resources used by grandchildren, and further removed descendants, if all of "
6424 "the intervening descendants waited on their terminated children."
6428 #: build/C/man2/getrusage.2:65
6430 msgid "B<RUSAGE_THREAD> (since Linux 2.6.26)"
6434 #: build/C/man2/getrusage.2:75
6436 "Return resource usage statistics for the calling thread. The B<_GNU_SOURCE> "
6437 "feature test macro must be defined (before including I<any> header file) in "
6438 "order to obtain the definition of this constant from "
6439 "I<E<lt>sys/resource.hE<gt>>."
6443 #: build/C/man2/getrusage.2:79
6445 "The resource usages are returned in the structure pointed to by I<usage>, "
6446 "which has the following form:"
6450 #: build/C/man2/getrusage.2:100
6454 " struct timeval ru_utime; /* user CPU time used */\n"
6455 " struct timeval ru_stime; /* system CPU time used */\n"
6456 " long ru_maxrss; /* maximum resident set size */\n"
6457 " long ru_ixrss; /* integral shared memory size */\n"
6458 " long ru_idrss; /* integral unshared data size */\n"
6459 " long ru_isrss; /* integral unshared stack size */\n"
6460 " long ru_minflt; /* page reclaims (soft page faults) */\n"
6461 " long ru_majflt; /* page faults (hard page faults) */\n"
6462 " long ru_nswap; /* swaps */\n"
6463 " long ru_inblock; /* block input operations */\n"
6464 " long ru_oublock; /* block output operations */\n"
6465 " long ru_msgsnd; /* IPC messages sent */\n"
6466 " long ru_msgrcv; /* IPC messages received */\n"
6467 " long ru_nsignals; /* signals received */\n"
6468 " long ru_nvcsw; /* voluntary context switches */\n"
6469 " long ru_nivcsw; /* involuntary context switches */\n"
6474 #: build/C/man2/getrusage.2:108
6476 "Not all fields are completed; unmaintained fields are set to zero by the "
6477 "kernel. (The unmaintained fields are provided for compatibility with other "
6478 "systems, and because they may one day be supported on Linux.) The fields "
6479 "are interpreted as follows:"
6483 #: build/C/man2/getrusage.2:108
6489 #: build/C/man2/getrusage.2:114
6491 "This is the total amount of time spent executing in user mode, expressed in "
6492 "a I<timeval> structure (seconds plus microseconds)."
6496 #: build/C/man2/getrusage.2:114
6502 #: build/C/man2/getrusage.2:120
6504 "This is the total amount of time spent executing in kernel mode, expressed "
6505 "in a I<timeval> structure (seconds plus microseconds)."
6509 #: build/C/man2/getrusage.2:120
6511 msgid "I<ru_maxrss> (since Linux 2.6.32)"
6515 #: build/C/man2/getrusage.2:127
6517 "This is the maximum resident set size used (in kilobytes). For "
6518 "B<RUSAGE_CHILDREN>, this is the resident set size of the largest child, not "
6519 "the maximum resident set size of the process tree."
6523 #: build/C/man2/getrusage.2:127
6525 msgid "I<ru_ixrss> (unmaintained)"
6528 #. On some systems, this field records the number of signals received.
6530 #: build/C/man2/getrusage.2:133 build/C/man2/getrusage.2:138 build/C/man2/getrusage.2:143 build/C/man2/getrusage.2:155 build/C/man2/getrusage.2:167 build/C/man2/getrusage.2:173 build/C/man2/getrusage.2:177
6531 msgid "This field is currently unused on Linux."
6535 #: build/C/man2/getrusage.2:133
6537 msgid "I<ru_idrss> (unmaintained)"
6541 #: build/C/man2/getrusage.2:138
6543 msgid "I<ru_isrss> (unmaintained)"
6547 #: build/C/man2/getrusage.2:143
6549 msgid "I<ru_minflt>"
6553 #: build/C/man2/getrusage.2:148
6555 "The number of page faults serviced without any I/O activity; here I/O "
6556 "activity is avoided by ``reclaiming'' a page frame from the list of pages "
6557 "awaiting reallocation."
6561 #: build/C/man2/getrusage.2:148
6563 msgid "I<ru_majflt>"
6567 #: build/C/man2/getrusage.2:151
6568 msgid "The number of page faults serviced that required I/O activity."
6572 #: build/C/man2/getrusage.2:151
6574 msgid "I<ru_nswap> (unmaintained)"
6578 #: build/C/man2/getrusage.2:155
6580 msgid "I<ru_inblock> (since Linux 2.6.22)"
6584 #: build/C/man2/getrusage.2:158
6585 msgid "The number of times the filesystem had to perform input."
6589 #: build/C/man2/getrusage.2:158
6591 msgid "I<ru_oublock> (since Linux 2.6.22)"
6595 #: build/C/man2/getrusage.2:161
6596 msgid "The number of times the filesystem had to perform output."
6600 #: build/C/man2/getrusage.2:161
6602 msgid "I<ru_msgsnd> (unmaintained)"
6606 #: build/C/man2/getrusage.2:167
6608 msgid "I<ru_msgrcv> (unmaintained)"
6612 #: build/C/man2/getrusage.2:173
6614 msgid "I<ru_nsignals> (unmaintained)"
6618 #: build/C/man2/getrusage.2:177
6620 msgid "I<ru_nvcsw> (since Linux 2.6)"
6624 #: build/C/man2/getrusage.2:182
6626 "The number of times a context switch resulted due to a process voluntarily "
6627 "giving up the processor before its time slice was completed (usually to "
6628 "await availability of a resource)."
6632 #: build/C/man2/getrusage.2:182
6634 msgid "I<ru_nivcsw> (since Linux 2.6)"
6638 #: build/C/man2/getrusage.2:187
6640 "The number of times a context switch resulted due to a higher priority "
6641 "process becoming runnable or because the current process exceeded its time "
6646 #: build/C/man2/getrusage.2:198
6647 msgid "I<usage> points outside the accessible address space."
6651 #: build/C/man2/getrusage.2:202
6652 msgid "I<who> is invalid."
6656 #: build/C/man2/getrusage.2:210
6658 "SVr4, 4.3BSD. POSIX.1-2001 specifies B<getrusage>(), but specifies only the "
6659 "fields I<ru_utime> and I<ru_stime>."
6663 #: build/C/man2/getrusage.2:213
6664 msgid "B<RUSAGE_THREAD> is Linux-specific."
6668 #: build/C/man2/getrusage.2:216
6669 msgid "Resource usage metrics are preserved across an B<execve>(2)."
6673 #: build/C/man2/getrusage.2:224
6675 "Including I<E<lt>sys/time.hE<gt>> is not required these days, but increases "
6676 "portability. (Indeed, I<struct timeval> is defined in "
6677 "I<E<lt>sys/time.hE<gt>>.)"
6680 #. See the description of getrusage() in XSH.
6681 #. A similar statement was also in SUSv2.
6683 #: build/C/man2/getrusage.2:236
6685 "In Linux kernel versions before 2.6.9, if the disposition of B<SIGCHLD> is "
6686 "set to B<SIG_IGN> then the resource usages of child processes are "
6687 "automatically included in the value returned by B<RUSAGE_CHILDREN>, although "
6688 "POSIX.1-2001 explicitly prohibits this. This nonconformance is rectified in "
6689 "Linux 2.6.9 and later."
6693 #: build/C/man2/getrusage.2:239
6695 "The structure definition shown at the start of this page was taken from "
6700 #: build/C/man2/getrusage.2:248
6702 "Ancient systems provided a B<vtimes>() function with a similar purpose to "
6703 "B<getrusage>(). For backward compatibility, glibc also provides "
6704 "B<vtimes>(). All new applications should be written using B<getrusage>()."
6708 #: build/C/man2/getrusage.2:253
6709 msgid "See also the description of I</proc/PID/stat> in B<proc>(5)."
6713 #: build/C/man2/getrusage.2:259
6715 "B<clock_gettime>(2), B<getrlimit>(2), B<times>(2), B<wait>(2), B<wait4>(2), "
6720 #: build/C/man2/getsid.2:26
6726 #: build/C/man2/getsid.2:26
6732 #: build/C/man2/getsid.2:29
6733 msgid "getsid - get session ID"
6737 #: build/C/man2/getsid.2:33
6738 msgid "B<pid_t getsid(pid_t>I< pid>B<);>"
6742 #: build/C/man2/getsid.2:42
6743 msgid "B<getsid>():"
6747 #: build/C/man2/getsid.2:45 build/C/man2/setpgid.2:79
6748 msgid "_XOPEN_SOURCE\\ E<gt>=\\ 500 || _XOPEN_SOURCE\\ &&\\ _XOPEN_SOURCE_EXTENDED"
6752 #: build/C/man2/getsid.2:47 build/C/man2/setpgid.2:81
6753 msgid "|| /* Since glibc 2.12: */ _POSIX_C_SOURCE\\ E<gt>=\\ 200809L"
6757 #: build/C/man2/getsid.2:58
6759 "I<getsid(0)> returns the session ID of the calling process. I<getsid(p)> "
6760 "returns the session ID of the process with process ID I<p>. (The session ID "
6761 "of a process is the process group ID of the session leader.)"
6765 #: build/C/man2/getsid.2:63
6767 "On success, a session ID is returned. On error, I<(pid_t)\\ -1> will be "
6768 "returned, and I<errno> is set appropriately."
6772 #: build/C/man2/getsid.2:70
6774 "A process with process ID I<p> exists, but it is not in the same session as "
6775 "the calling process, and the implementation considers this an error."
6779 #: build/C/man2/getsid.2:75
6780 msgid "No process with process ID I<p> was found."
6783 #. Linux has this system call since Linux 1.3.44.
6784 #. There is libc support since libc 5.2.19.
6786 #: build/C/man2/getsid.2:79
6787 msgid "This system call is available on Linux since version 2.0."
6791 #: build/C/man2/getsid.2:81 build/C/man2/setgid.2:73 build/C/man2/setsid.2:70
6792 msgid "SVr4, POSIX.1-2001."
6796 #: build/C/man2/getsid.2:84
6797 msgid "Linux does not return B<EPERM>."
6801 #: build/C/man2/getsid.2:87
6802 msgid "B<getpgid>(2), B<setsid>(2), B<credentials>(7)"
6806 #: build/C/man2/getuid.2:26
6812 #: build/C/man2/getuid.2:29
6813 msgid "getuid, geteuid - get user identity"
6817 #: build/C/man2/getuid.2:35
6818 msgid "B<uid_t getuid(void);>"
6822 #: build/C/man2/getuid.2:37
6823 msgid "B<uid_t geteuid(void);>"
6827 #: build/C/man2/getuid.2:40
6828 msgid "B<getuid>() returns the real user ID of the calling process."
6832 #: build/C/man2/getuid.2:43
6833 msgid "B<geteuid>() returns the effective user ID of the calling process."
6837 #: build/C/man2/getuid.2:48
6843 #: build/C/man2/getuid.2:57
6845 "In UNIX\\ V6 the B<getuid>() call returned I<(euid E<lt>E<lt> 8) + uid>. "
6846 "UNIX\\ V7 introduced separate calls B<getuid>() and B<geteuid>()."
6850 #: build/C/man2/getuid.2:73
6852 "The original Linux B<getuid>() and B<geteuid>() system calls supported "
6853 "only 16-bit user IDs. Subsequently, Linux 2.4 added B<getuid32>() and "
6854 "B<geteuid32>(), supporting 32-bit IDs. The glibc B<getuid>() and "
6855 "B<geteuid>() wrapper functions transparently deal with the variations "
6856 "across kernel versions."
6860 #: build/C/man2/getuid.2:77
6861 msgid "B<getresuid>(2), B<setreuid>(2), B<setuid>(2), B<credentials>(7)"
6865 #: build/C/man3/group_member.3:25
6867 msgid "GROUP_MEMBER"
6871 #: build/C/man3/group_member.3:25
6877 #: build/C/man3/group_member.3:25
6883 #: build/C/man3/group_member.3:28
6884 msgid "group_member - test whether a process is in a group"
6888 #: build/C/man3/group_member.3:32
6889 msgid "B<int group_member(gid_t >I<gid>B<);>"
6893 #: build/C/man3/group_member.3:40
6894 msgid "B<group_member>(): _GNU_SOURCE"
6898 #: build/C/man3/group_member.3:48
6900 "The B<group_member>() function tests whether any of the caller's "
6901 "supplementary group IDs (as returned by B<getgroups>(2)) matches I<gid>."
6905 #: build/C/man3/group_member.3:55
6907 "The B<group_member>() function returns nonzero if any of the caller's "
6908 "supplementary group IDs matches I<gid>, and zero otherwise."
6912 #: build/C/man3/group_member.3:57
6913 msgid "This function is a nonstandard GNU extension."
6917 #: build/C/man3/group_member.3:61
6918 msgid "B<getgid>(2), B<getgroups>(2), B<getgrouplist>(3), B<group>(5)"
6922 #: build/C/man2/iopl.2:33
6928 #: build/C/man2/iopl.2:33
6934 #: build/C/man2/iopl.2:36
6935 msgid "iopl - change I/O privilege level"
6939 #: build/C/man2/iopl.2:38
6940 msgid "B<#include E<lt>sys/io.hE<gt>>"
6944 #: build/C/man2/iopl.2:40
6945 msgid "B<int iopl(int >I<level>B<);>"
6949 #: build/C/man2/iopl.2:45
6951 "B<iopl>() changes the I/O privilege level of the calling process, as "
6952 "specified by the two least significant bits in I<level>."
6956 #: build/C/man2/iopl.2:51
6958 "This call is necessary to allow 8514-compatible X servers to run under "
6959 "Linux. Since these X servers require access to all 65536 I/O ports, the "
6960 "B<ioperm>(2) call is not sufficient."
6964 #: build/C/man2/iopl.2:55
6966 "In addition to granting unrestricted I/O port access, running at a higher "
6967 "I/O privilege level also allows the process to disable interrupts. This "
6968 "will probably crash the system, and is not recommended."
6972 #: build/C/man2/iopl.2:60
6973 msgid "Permissions are inherited by B<fork>(2) and B<execve>(2)."
6977 #: build/C/man2/iopl.2:62
6978 msgid "The I/O privilege level for a normal process is 0."
6982 #: build/C/man2/iopl.2:66
6984 "This call is mostly for the i386 architecture. On many other architectures "
6985 "it does not exist or will always return an error."
6989 #: build/C/man2/iopl.2:76
6990 msgid "I<level> is greater than 3."
6994 #: build/C/man2/iopl.2:79
6995 msgid "This call is unimplemented."
6999 #: build/C/man2/iopl.2:87
7001 "The calling process has insufficient privilege to call B<iopl>(); the "
7002 "B<CAP_SYS_RAWIO> capability is required to raise the I/O privilege level "
7003 "above its current value."
7007 #: build/C/man2/iopl.2:91
7009 "B<iopl>() is Linux-specific and should not be used in programs that are "
7010 "intended to be portable."
7014 #: build/C/man2/iopl.2:100
7016 "Libc5 treats it as a system call and has a prototype in "
7017 "I<E<lt>unistd.hE<gt>>. Glibc1 does not have a prototype. Glibc2 has a "
7018 "prototype both in I<E<lt>sys/io.hE<gt>> and in I<E<lt>sys/perm.hE<gt>>. "
7019 "Avoid the latter, it is available on i386 only."
7023 #: build/C/man2/iopl.2:103
7024 msgid "B<ioperm>(2), B<outb>(2), B<capabilities>(7)"
7028 #: build/C/man2/ioprio_set.2:24
7034 #: build/C/man2/ioprio_set.2:24
7040 #: build/C/man2/ioprio_set.2:27
7041 msgid "ioprio_get, ioprio_set - get/set I/O scheduling class and priority"
7045 #: build/C/man2/ioprio_set.2:31
7048 "B<int ioprio_get(int >I<which>B<, int >I<who>B<);>\n"
7049 "B<int ioprio_set(int >I<which>B<, int >I<who>B<, int >I<ioprio>B<);>\n"
7053 #: build/C/man2/ioprio_set.2:35
7054 msgid "I<Note>: There are no glibc wrappers for these system calls; see NOTES."
7058 #: build/C/man2/ioprio_set.2:42
7060 "The B<ioprio_get>() and B<ioprio_set>() system calls respectively get and "
7061 "set the I/O scheduling class and priority of one or more threads."
7065 #: build/C/man2/ioprio_set.2:54
7067 "The I<which> and I<who> arguments identify the thread(s) on which the system "
7068 "calls operate. The I<which> argument determines how I<who> is interpreted, "
7069 "and has one of the following values:"
7073 #: build/C/man2/ioprio_set.2:54
7075 msgid "B<IOPRIO_WHO_PROCESS>"
7079 #: build/C/man2/ioprio_set.2:61
7081 "I<who> is a process ID or thread ID identifying a single process or thread. "
7082 "If I<who> is 0, then operate on the calling thread."
7086 #: build/C/man2/ioprio_set.2:61
7088 msgid "B<IOPRIO_WHO_PGRP>"
7092 #: build/C/man2/ioprio_set.2:68
7094 "I<who> is a process group ID identifying all the members of a process "
7095 "group. If I<who> is 0, then operate on the process group of which the "
7096 "caller is a member."
7100 #: build/C/man2/ioprio_set.2:68
7102 msgid "B<IOPRIO_WHO_USER>"
7105 #. FIXME . Need to document the behavior when 'who" is specified as 0
7106 #. See http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=652443
7108 #: build/C/man2/ioprio_set.2:75
7110 "I<who> is a user ID identifying all of the processes that have a matching "
7115 #: build/C/man2/ioprio_set.2:98
7117 "If I<which> is specified as B<IOPRIO_WHO_PGRP> or B<IOPRIO_WHO_USER> when "
7118 "calling B<ioprio_get>(), and more than one process matches I<who>, then the "
7119 "returned priority will be the highest one found among all of the matching "
7120 "processes. One priority is said to be higher than another one if it belongs "
7121 "to a higher priority class (B<IOPRIO_CLASS_RT> is the highest priority "
7122 "class; B<IOPRIO_CLASS_IDLE> is the lowest) or if it belongs to the same "
7123 "priority class as the other process but has a higher priority level (a lower "
7124 "priority number means a higher priority level)."
7128 #: build/C/man2/ioprio_set.2:108
7130 "The I<ioprio> argument given to B<ioprio_set>() is a bit mask that "
7131 "specifies both the scheduling class and the priority to be assigned to the "
7132 "target process(es). The following macros are used for assembling and "
7133 "dissecting I<ioprio> values:"
7137 #: build/C/man2/ioprio_set.2:108
7139 msgid "B<IOPRIO_PRIO_VALUE(>I<class>B<, >I<data>B<)>"
7143 #: build/C/man2/ioprio_set.2:117
7145 "Given a scheduling I<class> and priority (I<data>), this macro combines the "
7146 "two values to produce an I<ioprio> value, which is returned as the result of "
7151 #: build/C/man2/ioprio_set.2:117
7153 msgid "B<IOPRIO_PRIO_CLASS(>I<mask>B<)>"
7157 #: build/C/man2/ioprio_set.2:129
7159 "Given I<mask> (an I<ioprio> value), this macro returns its I/O class "
7160 "component, that is, one of the values B<IOPRIO_CLASS_RT>, "
7161 "B<IOPRIO_CLASS_BE>, or B<IOPRIO_CLASS_IDLE>."
7165 #: build/C/man2/ioprio_set.2:129
7167 msgid "B<IOPRIO_PRIO_DATA(>I<mask>B<)>"
7171 #: build/C/man2/ioprio_set.2:138
7173 "Given I<mask> (an I<ioprio> value), this macro returns its priority "
7174 "(I<data>) component."
7178 #: build/C/man2/ioprio_set.2:141
7180 "See the NOTES section for more information on scheduling classes and "
7185 #: build/C/man2/ioprio_set.2:149
7187 "I/O priorities are supported for reads and for synchronous (B<O_DIRECT>, "
7188 "B<O_SYNC>) writes. I/O priorities are not supported for asynchronous "
7189 "writes because they are issued outside the context of the program dirtying "
7190 "the memory, and thus program-specific priorities do not apply."
7194 #: build/C/man2/ioprio_set.2:162
7196 "On success, B<ioprio_get>() returns the I<ioprio> value of the process with "
7197 "highest I/O priority of any of the processes that match the criteria "
7198 "specified in I<which> and I<who>. On error, -1 is returned, and I<errno> is "
7199 "set to indicate the error."
7203 #: build/C/man2/ioprio_set.2:169
7205 "On success, B<ioprio_set>() returns 0. On error, -1 is returned, and "
7206 "I<errno> is set to indicate the error."
7210 #: build/C/man2/ioprio_set.2:179
7212 "Invalid value for I<which> or I<ioprio>. Refer to the NOTES section for "
7213 "available scheduler classes and priority levels for I<ioprio>."
7217 #: build/C/man2/ioprio_set.2:187
7219 "The calling process does not have the privilege needed to assign this "
7220 "I<ioprio> to the specified process(es). See the NOTES section for more "
7221 "information on required privileges for B<ioprio_set>()."
7225 #: build/C/man2/ioprio_set.2:193
7227 "No process(es) could be found that matched the specification in I<which> and "
7232 #: build/C/man2/ioprio_set.2:196
7233 msgid "These system calls have been available on Linux since kernel 2.6.13."
7237 #: build/C/man2/ioprio_set.2:201
7239 "Glibc does not provide a wrapper for these system calls; call them using "
7244 #: build/C/man2/ioprio_set.2:220
7246 "Two or more processes or threads can share an I/O context. This will be the "
7247 "case when B<clone>(2) was called with the B<CLONE_IO> flag. However, by "
7248 "default, the distinct threads of a process will B<not> share the same I/O "
7249 "context. This means that if you want to change the I/O priority of all "
7250 "threads in a process, you may need to call B<ioprio_set>() on each of the "
7251 "threads. The thread ID that you would need for this operation is the one "
7252 "that is returned by B<gettid>(2) or B<clone>(2)."
7256 #: build/C/man2/ioprio_set.2:225
7258 "These system calls have an effect only when used in conjunction with an I/O "
7259 "scheduler that supports I/O priorities. As at kernel 2.6.17 the only such "
7260 "scheduler is the Completely Fair Queuing (CFQ) I/O scheduler."
7264 #: build/C/man2/ioprio_set.2:225
7266 msgid "Selecting an I/O scheduler"
7270 #: build/C/man2/ioprio_set.2:229
7272 "I/O Schedulers are selected on a per-device basis via the special file "
7273 "I</sys/block/E<lt>deviceE<gt>/queue/scheduler>."
7277 #: build/C/man2/ioprio_set.2:235
7279 "One can view the current I/O scheduler via the I</sys> filesystem. For "
7280 "example, the following command displays a list of all schedulers currently "
7281 "loaded in the kernel:"
7285 #: build/C/man2/ioprio_set.2:240
7288 "$B< cat /sys/block/hda/queue/scheduler>\n"
7289 "noop anticipatory deadline [cfq]\n"
7293 #: build/C/man2/ioprio_set.2:254
7295 "The scheduler surrounded by brackets is the one actually in use for the "
7296 "device (I<hda> in the example). Setting another scheduler is done by "
7297 "writing the name of the new scheduler to this file. For example, the "
7298 "following command will set the scheduler for the I<hda> device to I<cfq>:"
7302 #: build/C/man2/ioprio_set.2:260
7307 "#B< echo cfq E<gt> /sys/block/hda/queue/scheduler>\n"
7311 #: build/C/man2/ioprio_set.2:262
7313 msgid "The Completely Fair Queuing (CFQ) I/O scheduler"
7317 #: build/C/man2/ioprio_set.2:268
7319 "Since v3 (aka CFQ Time Sliced) CFQ implements I/O nice levels similar to "
7320 "those of CPU scheduling. These nice levels are grouped in three scheduling "
7321 "classes each one containing one or more priority levels:"
7325 #: build/C/man2/ioprio_set.2:268
7327 msgid "B<IOPRIO_CLASS_RT> (1)"
7331 #: build/C/man2/ioprio_set.2:283
7333 "This is the real-time I/O class. This scheduling class is given higher "
7334 "priority than any other class: processes from this class are given first "
7335 "access to the disk every time. Thus this I/O class needs to be used with "
7336 "some care: one I/O real-time process can starve the entire system. Within "
7337 "the real-time class, there are 8 levels of class data (priority) that "
7338 "determine exactly how much time this process needs the disk for on each "
7339 "service. The highest real-time priority level is 0; the lowest is 7. In "
7340 "the future this might change to be more directly mappable to performance, by "
7341 "passing in a desired data rate instead."
7345 #: build/C/man2/ioprio_set.2:283
7347 msgid "B<IOPRIO_CLASS_BE> (2)"
7351 #: build/C/man2/ioprio_set.2:296
7353 "This is the best-effort scheduling class, which is the default for any "
7354 "process that hasn't set a specific I/O priority. The class data (priority) "
7355 "determines how much I/O bandwidth the process will get. Best-effort "
7356 "priority levels are analogous to CPU nice values (see B<getpriority>(2)). "
7357 "The priority level determines a priority relative to other processes in the "
7358 "best-effort scheduling class. Priority levels range from 0 (highest) to 7 "
7363 #: build/C/man2/ioprio_set.2:296
7365 msgid "B<IOPRIO_CLASS_IDLE> (3)"
7369 #: build/C/man2/ioprio_set.2:305
7371 "This is the idle scheduling class. Processes running at this level only get "
7372 "I/O time when no-one else needs the disk. The idle class has no class "
7373 "data. Attention is required when assigning this priority class to a "
7374 "process, since it may become starved if higher priority processes are "
7375 "constantly accessing the disk."
7379 #: build/C/man2/ioprio_set.2:309
7381 "Refer to I<Documentation/block/ioprio.txt> for more information on the CFQ "
7382 "I/O Scheduler and an example program."
7386 #: build/C/man2/ioprio_set.2:309
7388 msgid "Required permissions to set I/O priorities"
7392 #: build/C/man2/ioprio_set.2:312
7394 "Permission to change a process's priority is granted or denied based on two "
7399 #: build/C/man2/ioprio_set.2:312
7401 msgid "B<Process ownership>"
7405 #: build/C/man2/ioprio_set.2:320
7407 "An unprivileged process may set only the I/O priority of a process whose "
7408 "real UID matches the real or effective UID of the calling process. A "
7409 "process which has the B<CAP_SYS_NICE> capability can change the priority of "
7414 #: build/C/man2/ioprio_set.2:320
7416 msgid "B<What is the desired priority>"
7420 #: build/C/man2/ioprio_set.2:332
7422 "Attempts to set very high priorities (B<IOPRIO_CLASS_RT>) require the "
7423 "B<CAP_SYS_ADMIN> capability. Kernel versions up to 2.6.24 also required "
7424 "B<CAP_SYS_ADMIN> to set a very low priority (B<IOPRIO_CLASS_IDLE>), but "
7425 "since Linux 2.6.25, this is no longer required."
7429 #: build/C/man2/ioprio_set.2:337
7431 "A call to B<ioprio_set>() must follow both rules, or the call will fail "
7432 "with the error B<EPERM>."
7435 #. 6 May 07: Bug report raised:
7436 #. http://sources.redhat.com/bugzilla/show_bug.cgi?id=4464
7437 #. Ulrich Drepper replied that he wasn't going to add these
7440 #: build/C/man2/ioprio_set.2:346
7442 "Glibc does not yet provide a suitable header file defining the function "
7443 "prototypes and macros described on this page. Suitable definitions can be "
7444 "found in I<linux/ioprio.h>."
7448 #: build/C/man2/ioprio_set.2:351
7449 msgid "B<ionice>(1), B<getpriority>(2), B<open>(2), B<capabilities>(7)"
7453 #: build/C/man2/ioprio_set.2:353
7454 msgid "I<Documentation/block/ioprio.txt> in the Linux kernel source tree"
7458 #: build/C/man2/ipc.2:25
7464 #: build/C/man2/ipc.2:25
7470 #: build/C/man2/ipc.2:28
7471 msgid "ipc - System V IPC system calls"
7475 #: build/C/man2/ipc.2:33
7478 "B<int ipc(unsigned int >I<call>B<, int >I<first>B<, int >I<second>B<, int "
7480 "B< void *>I<ptr>B<, long >I<fifth>B<);>\n"
7484 #: build/C/man2/ipc.2:41
7486 "B<ipc>() is a common kernel entry point for the System\\ V IPC calls for "
7487 "messages, semaphores, and shared memory. I<call> determines which IPC "
7488 "function to invoke; the other arguments are passed through to the "
7493 #: build/C/man2/ipc.2:45
7495 "User programs should call the appropriate functions by their usual names. "
7496 "Only standard library implementors and kernel hackers need to know about "
7501 #: build/C/man2/ipc.2:49
7503 "B<ipc>() is Linux-specific, and should not be used in programs intended to "
7508 #: build/C/man2/ipc.2:57
7510 "On some architectures\\(emfor example x86-64 and ARM\\(emthere is no "
7511 "B<ipc>() system call; instead B<msgctl>(2), B<semctl>(2), B<shmctl>(2), and "
7512 "so on really are implemented as separate system calls."
7516 #: build/C/man2/ipc.2:69
7518 "B<msgctl>(2), B<msgget>(2), B<msgrcv>(2), B<msgsnd>(2), B<semctl>(2), "
7519 "B<semget>(2), B<semop>(2), B<semtimedop>(2), B<shmat>(2), B<shmctl>(2), "
7520 "B<shmdt>(2), B<shmget>(2)"
7524 #: build/C/man7/namespaces.7:27
7530 #: build/C/man7/namespaces.7:30
7531 msgid "namespaces - overview of Linux namespaces"
7535 #: build/C/man7/namespaces.7:37
7537 "A namespace wraps a global system resource in an abstraction that makes it "
7538 "appear to the processes within the namespace that they have their own "
7539 "isolated instance of the global resource. Changes to the global resource "
7540 "are visible to other processes that are members of the namespace, but are "
7541 "invisible to other processes. One use of namespaces is to implement "
7546 #: build/C/man7/namespaces.7:39
7547 msgid "Linux provides the following namespaces:"
7551 #: build/C/man7/namespaces.7:42
7553 msgid "Namespace\tConstant\tIsolates\n"
7557 #: build/C/man7/namespaces.7:43
7559 msgid "IPC\tCLONE_NEWIPC\tSystem V IPC, POSIX message queues\n"
7563 #: build/C/man7/namespaces.7:44
7565 msgid "Network\tCLONE_NEWNET\tNetwork devices, stacks, ports, etc.\n"
7569 #: build/C/man7/namespaces.7:45
7571 msgid "Mount\tCLONE_NEWNS\tMount points\n"
7575 #: build/C/man7/namespaces.7:46
7577 msgid "PID\tCLONE_NEWPID\tProcess IDs\n"
7581 #: build/C/man7/namespaces.7:47
7583 msgid "User\tCLONE_NEWUSER\tUser and group IDs\n"
7587 #: build/C/man7/namespaces.7:48
7589 msgid "UTS\tCLONE_NEWUTS\tHostname and NIS domain name\n"
7593 #. ==================== The namespaces API ====================
7595 #: build/C/man7/namespaces.7:57
7597 "This page describes the various namespaces and the associated I</proc> "
7598 "files, and summarizes the APIs for working with namespaces."
7602 #: build/C/man7/namespaces.7:57
7604 msgid "The namespaces API"
7608 #: build/C/man7/namespaces.7:62
7610 "As well as various I</proc> files described below, the namespaces API "
7611 "includes the following system calls:"
7615 #: build/C/man7/namespaces.7:62
7621 #: build/C/man7/namespaces.7:75
7623 "The B<clone>(2) system call creates a new process. If the I<flags> "
7624 "argument of the call specifies one or more of the B<CLONE_NEW*> flags listed "
7625 "below, then new namespaces are created for each flag, and the child process "
7626 "is made a member of those namespaces. (This system call also implements a "
7627 "number of features unrelated to namespaces.)"
7631 #: build/C/man7/namespaces.7:75
7637 #: build/C/man7/namespaces.7:84
7639 "The B<setns>(2) system call allows the calling process to join an existing "
7640 "namespace. The namespace to join is specified via a file descriptor that "
7641 "refers to one of the I</proc/[pid]/ns> files described below."
7645 #: build/C/man7/namespaces.7:84
7647 msgid "B<unshare>(2)"
7651 #: build/C/man7/namespaces.7:97
7653 "The B<unshare>(2) system call moves the calling process to a new "
7654 "namespace. If the I<flags> argument of the call specifies one or more of "
7655 "the B<CLONE_NEW*> flags listed below, then new namespaces are created for "
7656 "each flag, and the calling process is made a member of those namespaces. "
7657 "(This system call also implements a number of features unrelated to "
7662 #. ==================== The /proc/[pid]/ns/ directory ====================
7664 #: build/C/man7/namespaces.7:110
7666 "Creation of new namespaces using B<clone>(2) and B<unshare>(2) in most "
7667 "cases requires the B<CAP_SYS_ADMIN> capability. User namespaces are the "
7668 "exception: since Linux 3.8, no privilege is required to create a user "
7673 #: build/C/man7/namespaces.7:110
7675 msgid "The /proc/[pid]/ns/ directory"
7678 #. See commit 6b4e306aa3dc94a0545eb9279475b1ab6209a31f
7680 #: build/C/man7/namespaces.7:117
7682 "Each process has a I</proc/[pid]/ns/> subdirectory containing one entry for "
7683 "each namespace that supports being manipulated by B<setns>(2):"
7687 #: build/C/man7/namespaces.7:128
7690 "$ B<ls -l /proc/$$/ns>\n"
7692 "lrwxrwxrwx. 1 mtk mtk 0 Jan 14 01:20 ipc -E<gt> ipc:[4026531839]\n"
7693 "lrwxrwxrwx. 1 mtk mtk 0 Jan 14 01:20 mnt -E<gt> mnt:[4026531840]\n"
7694 "lrwxrwxrwx. 1 mtk mtk 0 Jan 14 01:20 net -E<gt> net:[4026531956]\n"
7695 "lrwxrwxrwx. 1 mtk mtk 0 Jan 14 01:20 pid -E<gt> pid:[4026531836]\n"
7696 "lrwxrwxrwx. 1 mtk mtk 0 Jan 14 01:20 user -E<gt> user:[4026531837]\n"
7697 "lrwxrwxrwx. 1 mtk mtk 0 Jan 14 01:20 uts -E<gt> uts:[4026531838]\n"
7701 #: build/C/man7/namespaces.7:138
7703 "Bind mounting (see B<mount>(2)) one of the files in this directory to "
7704 "somewhere else in the filesystem keeps the corresponding namespace of the "
7705 "process specified by I<pid> alive even if all processes currently in the "
7706 "namespace terminate."
7710 #: build/C/man7/namespaces.7:149
7712 "Opening one of the files in this directory (or a file that is bind mounted "
7713 "to one of these files) returns a file handle for the corresponding "
7714 "namespace of the process specified by I<pid>. As long as this file "
7715 "descriptor remains open, the namespace will remain alive, even if all "
7716 "processes in the namespace terminate. The file descriptor can be passed to "
7721 #: build/C/man7/namespaces.7:160
7723 "In Linux 3.7 and earlier, these files were visible as hard links. Since "
7724 "Linux 3.8, they appear as symbolic links. If two processes are in the same "
7725 "namespace, then the inode numbers of their I</proc/[pid]/ns/xxx> symbolic "
7726 "links will be the same; an application can check this using the "
7727 "I<stat.st_ino> field returned by B<stat>(2). The content of this symbolic "
7728 "link is a string containing the namespace type and inode number as in the "
7729 "following example:"
7733 #: build/C/man7/namespaces.7:165
7736 "$ B<readlink /proc/$$/ns/uts>\n"
7737 "uts:[4026531838]\n"
7741 #: build/C/man7/namespaces.7:169
7742 msgid "The files in this subdirectory are as follows:"
7746 #: build/C/man7/namespaces.7:169
7748 msgid "I</proc/[pid]/ns/ipc> (since Linux 3.0)"
7752 #: build/C/man7/namespaces.7:172
7753 msgid "This file is a handle for the IPC namespace of the process."
7757 #: build/C/man7/namespaces.7:172
7759 msgid "I</proc/[pid]/ns/mnt> (since Linux 3.8)"
7763 #: build/C/man7/namespaces.7:175
7764 msgid "This file is a handle for the mount namespace of the process."
7768 #: build/C/man7/namespaces.7:175
7770 msgid "I</proc/[pid]/ns/net> (since Linux 3.0)"
7774 #: build/C/man7/namespaces.7:178
7775 msgid "This file is a handle for the network namespace of the process."
7779 #: build/C/man7/namespaces.7:178
7781 msgid "I</proc/[pid]/ns/pid> (since Linux 3.8)"
7785 #: build/C/man7/namespaces.7:181
7786 msgid "This file is a handle for the PID namespace of the process."
7790 #: build/C/man7/namespaces.7:181
7792 msgid "I</proc/[pid]/ns/user> (since Linux 3.8)"
7796 #: build/C/man7/namespaces.7:184
7797 msgid "This file is a handle for the user namespace of the process."
7801 #: build/C/man7/namespaces.7:184
7803 msgid "I</proc/[pid]/ns/uts> (since Linux 3.0)"
7807 #. ==================== IPC namespaces ====================
7809 #: build/C/man7/namespaces.7:190
7810 msgid "This file is a handle for the UTS namespace of the process."
7814 #: build/C/man7/namespaces.7:190
7816 msgid "IPC namespaces (CLONE_NEWIPC)"
7819 #. commit 7eafd7c74c3f2e67c27621b987b28397110d643f
7820 #. https://lwn.net/Articles/312232/
7822 #: build/C/man7/namespaces.7:202
7824 "IPC namespaces isolate certain IPC resources, namely, System V IPC objects "
7825 "(see B<svipc>(7)) and (since Linux 2.6.30) POSIX message queues (see "
7826 "B<mq_overview>(7)). The common characteristic of these IPC mechanisms is "
7827 "that IPC objects are identified by mechanisms other than filesystem "
7832 #: build/C/man7/namespaces.7:208
7834 "Each IPC namespace has its own set of System V IPC identifiers and its own "
7835 "POSIX message queue filesystem. Objects created in an IPC namespace are "
7836 "visible to all other processes that are members of that namespace, but are "
7837 "not visible to processes in other IPC namespaces."
7841 #: build/C/man7/namespaces.7:212
7842 msgid "The following I</proc> interfaces are distinct in each IPC namespace:"
7846 #: build/C/man7/namespaces.7:215
7847 msgid "The POSIX message queue interfaces in I</proc/sys/fs/mqueue>."
7851 #: build/C/man7/namespaces.7:228
7853 "The System V IPC interfaces in I</proc/sys/kernel>, namely: I<msgmax>, "
7854 "I<msgmnb>, I<msgmni>, I<sem>, I<shmall>, I<shmmax>, I<shmmni>, and "
7855 "I<shm_rmid_forced>."
7859 #: build/C/man7/namespaces.7:231
7860 msgid "The System V IPC interfaces in I</proc/sysvipc>."
7864 #: build/C/man7/namespaces.7:235
7866 "When an IPC namespace is destroyed (i.e., when the last process that is a "
7867 "member of the namespace terminates), all IPC objects in the namespace are "
7868 "automatically destroyed."
7872 #. ==================== Network namespaces ====================
7874 #: build/C/man7/namespaces.7:242
7876 "Use of IPC namespaces requires a kernel that is configured with the "
7877 "B<CONFIG_IPC_NS> option."
7881 #: build/C/man7/namespaces.7:242
7883 msgid "Network namespaces (CLONE_NEWNET)"
7886 #. FIXME Add pointer to veth(4) page when it is eventually completed
7888 #: build/C/man7/namespaces.7:257
7890 "Network namespaces provide isolation of the system resources associated with "
7891 "networking: network devices, IPv4 and IPv6 protocol stacks, IP routing "
7892 "tables, firewalls, the I</proc/net> directory, the I</sys/class/net> "
7893 "directory, port numbers (sockets), and so on. A physical network device can "
7894 "live in exactly one network namespace. A virtual network device (\"veth\") "
7895 "pair provides a pipe-like abstraction that can be used to create tunnels "
7896 "between network namespaces, and can be used to create a bridge to a physical "
7897 "network device in another namespace."
7901 #: build/C/man7/namespaces.7:262
7903 "When a network namespace is freed (i.e., when the last process in the "
7904 "namespace terminates), its physical network devices are moved back to the "
7905 "initial network namespace (not to the parent of the process)."
7909 #. ==================== Mount namespaces ====================
7911 #: build/C/man7/namespaces.7:269
7913 "Use of network namespaces requires a kernel that is configured with the "
7914 "B<CONFIG_NET_NS> option."
7918 #: build/C/man7/namespaces.7:269
7920 msgid "Mount namespaces (CLONE_NEWNS)"
7924 #: build/C/man7/namespaces.7:277
7926 "Mount namespaces isolate the set of filesystem mount points, meaning that "
7927 "processes in different mount namespaces can have different views of the "
7928 "filesystem hierarchy. The set of mounts in a mount namespace is modified "
7929 "using B<mount>(2) and B<umount>(2)."
7933 #: build/C/man7/namespaces.7:294
7935 "The I</proc/[pid]/mounts> file (present since Linux 2.4.19) lists all the "
7936 "filesystems currently mounted in the process's mount namespace. The format "
7937 "of this file is documented in B<fstab>(5). Since kernel version 2.6.15, "
7938 "this file is pollable: after opening the file for reading, a change in this "
7939 "file (i.e., a filesystem mount or unmount) causes B<select>(2) to mark the "
7940 "file descriptor as readable, and B<poll>(2) and B<epoll_wait>(2) mark the "
7941 "file as having an error condition."
7945 #: build/C/man7/namespaces.7:302
7947 "The I</proc/[pid]/mountstats> file (present since Linux 2.6.17) exports "
7948 "information (statistics, configuration information) about the mount points "
7949 "in the process's mount namespace. This file is only readable by the owner "
7950 "of the process. Lines in this file have the form:"
7954 #: build/C/man7/namespaces.7:308
7957 "device /dev/sda7 mounted on /home with fstype ext3 [statistics]\n"
7958 "( 1 ) ( 2 ) (3 ) (4)\n"
7962 #: build/C/man7/namespaces.7:312
7963 msgid "The fields in each line are:"
7967 #: build/C/man7/namespaces.7:312 build/C/man7/user_namespaces.7:371
7973 #: build/C/man7/namespaces.7:316
7975 "The name of the mounted device (or \"nodevice\" if there is no corresponding "
7980 #: build/C/man7/namespaces.7:316 build/C/man7/user_namespaces.7:375
7986 #: build/C/man7/namespaces.7:319
7987 msgid "The mount point within the filesystem tree."
7991 #: build/C/man7/namespaces.7:319 build/C/man7/user_namespaces.7:401
7997 #: build/C/man7/namespaces.7:322
7998 msgid "The filesystem type."
8002 #: build/C/man7/namespaces.7:322
8008 #: build/C/man7/namespaces.7:327
8010 "Optional statistics and configuration information. Currently (as at Linux "
8011 "2.6.26), only NFS filesystems export information via this field."
8015 #. ==================== PID namespaces ====================
8017 #: build/C/man7/namespaces.7:331
8019 msgid "PID namespaces (CLONE_NEWPID)"
8023 #. ==================== User namespaces ====================
8025 #: build/C/man7/namespaces.7:337
8026 msgid "See B<pid_namespaces>(7)."
8030 #: build/C/man7/namespaces.7:337
8032 msgid "User namespaces (CLONE_NEWUSER)"
8036 #: build/C/man7/namespaces.7:343 build/C/man7/namespaces.7:364 build/C/man7/pid_namespaces.7:356
8037 msgid "See B<user_namespaces>(7)."
8041 #: build/C/man7/namespaces.7:343
8043 msgid "UTS namespaces (CLONE_NEWUTS)"
8047 #: build/C/man7/namespaces.7:355
8049 "UTS namespaces provide isolation of two system identifiers: the hostname and "
8050 "the NIS domain name. These identifiers are set using B<sethostname>(2) and "
8051 "B<setdomainname>(2), and can be retrieved using B<uname>(2), "
8052 "B<gethostname>(2), and B<getdomainname>(2)."
8056 #: build/C/man7/namespaces.7:359
8058 "Use of UTS namespaces requires a kernel that is configured with the "
8059 "B<CONFIG_UTS_NS> option."
8063 #: build/C/man7/namespaces.7:361 build/C/man7/pid_namespaces.7:353 build/C/man7/user_namespaces.7:648
8064 msgid "Namespaces are a Linux-specific feature."
8068 #: build/C/man7/namespaces.7:376
8070 "B<nsenter>(1), B<readlink>(1), B<unshare>(1), B<clone>(2), B<setns>(2), "
8071 "B<unshare>(2), B<proc>(5), B<credentials>(7), B<capabilities>(7), "
8072 "B<pid_namespaces>(7), B<user_namespaces>(7), B<switch_root>(8)"
8076 #: build/C/man7/pid_namespaces.7:27
8078 msgid "PID_NAMESPACES"
8082 #: build/C/man7/pid_namespaces.7:27 build/C/man2/seccomp.2:27
8088 #: build/C/man7/pid_namespaces.7:30
8089 msgid "pid_namespaces - overview of Linux PID namespaces"
8093 #: build/C/man7/pid_namespaces.7:33 build/C/man7/user_namespaces.7:33
8094 msgid "For an overview of namespaces, see B<namespaces>(7)."
8098 #: build/C/man7/pid_namespaces.7:40
8100 "PID namespaces isolate the process ID number space, meaning that processes "
8101 "in different PID namespaces can have the same PID. PID namespaces allow "
8102 "containers to provide functionality such as suspending/resuming the set of "
8103 "processes in the container and migrating the container to a new host while "
8104 "the processes inside the container maintain the same PIDs."
8108 #: build/C/man7/pid_namespaces.7:48
8110 "PIDs in a new PID namespace start at 1, somewhat like a standalone system, "
8111 "and calls to B<fork>(2), B<vfork>(2), or B<clone>(2) will produce processes "
8112 "with PIDs that are unique within the namespace."
8116 #. ============================================================
8118 #: build/C/man7/pid_namespaces.7:55
8120 "Use of PID namespaces requires a kernel that is configured with the "
8121 "B<CONFIG_PID_NS> option."
8125 #: build/C/man7/pid_namespaces.7:55
8127 msgid "The namespace init process"
8131 #: build/C/man7/pid_namespaces.7:75
8133 "The first process created in a new namespace (i.e., the process created "
8134 "using B<clone>(2) with the B<CLONE_NEWPID> flag, or the first child created "
8135 "by a process after a call to B<unshare>(2) using the B<CLONE_NEWPID> flag) "
8136 "has the PID 1, and is the \"init\" process for the namespace (see "
8137 "B<init>(1)). A child process that is orphaned within the namespace will be "
8138 "reparented to this process rather than B<init>(1) (unless one of the "
8139 "ancestors of the child in the same PID namespace employed the B<prctl>(2) "
8140 "B<PR_SET_CHILD_SUBREAPER> command to mark itself as the reaper of orphaned "
8141 "descendant processes)."
8145 #: build/C/man7/pid_namespaces.7:102
8147 "If the \"init\" process of a PID namespace terminates, the kernel terminates "
8148 "all of the processes in the namespace via a B<SIGKILL> signal. This "
8149 "behavior reflects the fact that the \"init\" process is essential for the "
8150 "correct operation of a PID namespace. In this case, a subsequent B<fork>(2) "
8151 "into this PID namespace will fail with the error B<ENOMEM>; it is not "
8152 "possible to create a new processes in a PID namespace whose \"init\" process "
8153 "has terminated. Such scenarios can occur when, for example, a process uses "
8154 "an open file descriptor for a I</proc/[pid]/ns/pid> file corresponding to a "
8155 "process that was in a namespace to B<setns>(2) into that namespace after "
8156 "the \"init\" process has terminated. Another possible scenario can occur "
8157 "after a call to B<unshare>(2): if the first child subsequently created by a "
8158 "B<fork>(2) terminates, then subsequent calls to B<fork>(2) will fail with "
8163 #: build/C/man7/pid_namespaces.7:108
8165 "Only signals for which the \"init\" process has established a signal handler "
8166 "can be sent to the \"init\" process by other members of the PID namespace. "
8167 "This restriction applies even to privileged processes, and prevents other "
8168 "members of the PID namespace from accidentally killing the \"init\" process."
8172 #: build/C/man7/pid_namespaces.7:128
8174 "Likewise, a process in an ancestor namespace can\\(emsubject to the usual "
8175 "permission checks described in B<kill>(2)\\(emsend signals to the \"init\" "
8176 "process of a child PID namespace only if the \"init\" process has "
8177 "established a handler for that signal. (Within the handler, the "
8178 "I<siginfo_t> I<si_pid> field described in B<sigaction>(2) will be zero.) "
8179 "B<SIGKILL> or B<SIGSTOP> are treated exceptionally: these signals are "
8180 "forcibly delivered when sent from an ancestor PID namespace. Neither of "
8181 "these signals can be caught by the \"init\" process, and so will result in "
8182 "the usual actions associated with those signals (respectively, terminating "
8183 "and stopping the process)."
8187 #. ============================================================
8189 #: build/C/man7/pid_namespaces.7:138
8191 "Starting with Linux 3.4, the B<reboot>(2) system call causes a signal to be "
8192 "sent to the namespace \"init\" process. See B<reboot>(2) for more details."
8196 #: build/C/man7/pid_namespaces.7:138
8198 msgid "Nesting PID namespaces"
8202 #: build/C/man7/pid_namespaces.7:149
8204 "PID namespaces can be nested: each PID namespace has a parent, except for "
8205 "the initial (\"root\") PID namespace. The parent of a PID namespace is the "
8206 "PID namespace of the process that created the namespace using B<clone>(2) "
8207 "or B<unshare>(2). PID namespaces thus form a tree, with all namespaces "
8208 "ultimately tracing their ancestry to the root namespace."
8212 #: build/C/man7/pid_namespaces.7:164
8214 "A process is visible to other processes in its PID namespace, and to the "
8215 "processes in each direct ancestor PID namespace going back to the root PID "
8216 "namespace. In this context, \"visible\" means that one process can be the "
8217 "target of operations by another process using system calls that specify a "
8218 "process ID. Conversely, the processes in a child PID namespace can't see "
8219 "processes in the parent and further removed ancestor namespaces. More "
8220 "succinctly: a process can see (e.g., send signals with B<kill>(2), set nice "
8221 "values with B<setpriority>(2), etc.) only processes contained in its own PID "
8222 "namespace and in descendants of that namespace."
8226 #: build/C/man7/pid_namespaces.7:176
8228 "A process has one process ID in each of the layers of the PID namespace "
8229 "hierarchy in which is visible, and walking back though each direct ancestor "
8230 "namespace through to the root PID namespace. System calls that operate on "
8231 "process IDs always operate using the process ID that is visible in the PID "
8232 "namespace of the caller. A call to B<getpid>(2) always returns the PID "
8233 "associated with the namespace in which the process was created."
8237 #: build/C/man7/pid_namespaces.7:191
8239 "Some processes in a PID namespace may have parents that are outside of the "
8240 "namespace. For example, the parent of the initial process in the namespace "
8241 "(i.e., the B<init>(1) process with PID 1) is necessarily in another "
8242 "namespace. Likewise, the direct children of a process that uses B<setns>(2) "
8243 "to cause its children to join a PID namespace are in a different PID "
8244 "namespace from the caller of B<setns>(2). Calls to B<getppid>(2) for such "
8245 "processes return 0."
8249 #. ============================================================
8251 #: build/C/man7/pid_namespaces.7:204
8253 "While processes may freely descend into child PID namespaces (e.g., using "
8254 "B<setns>(2) with B<CLONE_NEWPID>), they may not move in the other "
8255 "direction. That is to say, processes may not enter any ancestor namespaces "
8256 "(parent, grandparent, etc.). Changing PID namespaces is a one way "
8261 #: build/C/man7/pid_namespaces.7:204
8263 msgid "setns(2) and unshare(2) semantics"
8267 #: build/C/man7/pid_namespaces.7:220
8269 "Calls to B<setns>(2) that specify a PID namespace file descriptor and calls "
8270 "to B<unshare>(2) with the B<CLONE_NEWPID> flag cause children subsequently "
8271 "created by the caller to be placed in a different PID namespace from the "
8272 "caller. These calls do not, however, change the PID namespace of the "
8273 "calling process, because doing so would change the caller's idea of its own "
8274 "PID (as reported by B<getpid>()), which would break many applications and "
8279 #: build/C/man7/pid_namespaces.7:228
8281 "To put things another way: a process's PID namespace membership is "
8282 "determined when the process is created and cannot be changed thereafter. "
8283 "Among other things, this means that the parental relationship between "
8284 "processes mirrors the parental relationship between PID namespaces: the "
8285 "parent of a process is either in the same namespace or resides in the "
8286 "immediate parent PID namespace."
8290 #: build/C/man7/pid_namespaces.7:228
8292 msgid "Compatibility of CLONE_NEWPID with other CLONE_* flags"
8296 #: build/C/man7/pid_namespaces.7:233
8297 msgid "B<CLONE_NEWPID> can't be combined with some other B<CLONE_*> flags:"
8301 #: build/C/man7/pid_namespaces.7:241
8303 "B<CLONE_THREAD> requires being in the same PID namespace in order that the "
8304 "threads in a process can send signals to each other. Similarly, it must be "
8305 "possible to see all of the threads of a processes in the B<proc>(5) "
8310 #: build/C/man7/pid_namespaces.7:252
8312 "B<CLONE_SIGHAND> requires being in the same PID namespace; otherwise the "
8313 "process ID of the process sending a signal could not be meaningfully encoded "
8314 "when a signal is sent (see the description of the I<siginfo_t> type in "
8315 "B<sigaction>(2)). A signal queue shared by processes in multiple PID "
8316 "namespaces will defeat that."
8320 #: build/C/man7/pid_namespaces.7:262
8322 "B<CLONE_VM> requires all of the threads to be in the same PID namespace, "
8323 "because, from the point of view of a core dump, if two processes share the "
8324 "same address space then they are threads and will be core dumped together. "
8325 "When a core dump is written, the PID of each thread is written into the core "
8326 "dump. Writing the process IDs could not meaningfully succeed if some of the "
8327 "process IDs were in a parent PID namespace."
8331 #: build/C/man7/pid_namespaces.7:280
8333 "To summarize: there is a technical requirement for each of B<CLONE_THREAD>, "
8334 "B<CLONE_SIGHAND>, and B<CLONE_VM> to share a PID namespace. (Note "
8335 "furthermore that in B<clone>(2) requires B<CLONE_VM> to be specified if "
8336 "B<CLONE_THREAD> or B<CLONE_SIGHAND> is specified.) Thus, call sequences "
8337 "such as the following will fail (with the error B<EINVAL>):"
8341 #: build/C/man7/pid_namespaces.7:284
8344 " unshare(CLONE_NEWPID);\n"
8345 " clone(..., CLONE_VM, ...); /* Fails */\n"
8349 #: build/C/man7/pid_namespaces.7:287
8352 " setns(fd, CLONE_NEWPID);\n"
8353 " clone(..., CLONE_VM, ...); /* Fails */\n"
8357 #: build/C/man7/pid_namespaces.7:290
8360 " clone(..., CLONE_VM, ...);\n"
8361 " setns(fd, CLONE_NEWPID); /* Fails */\n"
8365 #: build/C/man7/pid_namespaces.7:293
8368 " clone(..., CLONE_VM, ...);\n"
8369 " unshare(CLONE_NEWPID); /* Fails */\n"
8373 #. ============================================================
8375 #: build/C/man7/pid_namespaces.7:297
8377 msgid "/proc and PID namespaces"
8381 #: build/C/man7/pid_namespaces.7:306
8383 "A I</proc> filesystem shows (in the I</proc/PID> directories) only processes "
8384 "visible in the PID namespace of the process that performed the mount, even "
8385 "if the I</proc> filesystem is viewed from processes in other namespaces."
8389 #: build/C/man7/pid_namespaces.7:325
8391 "After creating a new PID namespace, it is useful for the child to change its "
8392 "root directory and mount a new procfs instance at I</proc> so that tools "
8393 "such as B<ps>(1) work correctly. If a new mount namespace is "
8394 "simultaneously created by including B<CLONE_NEWNS> in the I<flags> argument "
8395 "of B<clone>(2) or B<unshare>(2), then it isn't necessary to change the root "
8396 "directory: a new procfs instance can be mounted directly over I</proc>."
8400 #: build/C/man7/pid_namespaces.7:329
8401 msgid "From a shell, the command to mount I</proc> is:"
8405 #: build/C/man7/pid_namespaces.7:331
8407 msgid " $ mount -t proc proc /proc\n"
8411 #. ============================================================
8413 #: build/C/man7/pid_namespaces.7:343
8415 "Calling B<readlink>(2) on the path I</proc/self> yields the process ID of "
8416 "the caller in the PID namespace of the procfs mount (i.e., the PID namespace "
8417 "of the process that mounted the procfs). This can be useful for "
8418 "introspection purposes, when a process wants to discover its PID in other "
8423 #: build/C/man7/pid_namespaces.7:343 build/C/man7/user_namespaces.7:635
8425 msgid "Miscellaneous"
8429 #: build/C/man7/pid_namespaces.7:351
8431 "When a process ID is passed over a UNIX domain socket to a process in a "
8432 "different PID namespace (see the description of B<SCM_CREDENTIALS> in "
8433 "B<unix>(7)), it is translated into the corresponding PID value in the "
8434 "receiving process's PID namespace."
8438 #: build/C/man7/pid_namespaces.7:364
8440 "B<clone>(2), B<setns>(2), B<unshare>(2), B<proc>(5), B<credentials>(7), "
8441 "B<capabilities>(7), B<user_namespaces>(7), B<switch_root>(8)"
8445 #: build/C/man2/seteuid.2:29
8451 #: build/C/man2/seteuid.2:32
8452 msgid "seteuid, setegid - set effective user or group ID"
8456 #: build/C/man2/seteuid.2:38
8457 msgid "B<int seteuid(uid_t >I<euid>B<);>"
8461 #: build/C/man2/seteuid.2:40
8462 msgid "B<int setegid(gid_t >I<egid>B<);>"
8466 #: build/C/man2/seteuid.2:49
8467 msgid "B<seteuid>(), B<setegid>():"
8471 #: build/C/man2/seteuid.2:51
8473 "_BSD_SOURCE || _POSIX_C_SOURCE\\ E<gt>=\\ 200112L || _XOPEN_SOURCE\\ "
8478 #: build/C/man2/seteuid.2:58
8480 "B<seteuid>() sets the effective user ID of the calling process. "
8481 "Unprivileged user processes may only set the effective user ID to the real "
8482 "user ID, the effective user ID or the saved set-user-ID."
8487 #. equals \-1, nothing is changed.
8488 #. (This is an artifact of the implementation in glibc of seteuid()
8489 #. using setresuid(2).)
8491 #: build/C/man2/seteuid.2:67
8493 "Precisely the same holds for B<setegid>() with \"group\" instead of "
8498 #: build/C/man2/seteuid.2:79
8500 "I<Note>: there are cases where B<seteuid>() can fail even when the caller "
8501 "is UID 0; it is a grave security error to omit checking for a failure return "
8502 "from B<seteuid>()."
8506 #: build/C/man2/seteuid.2:83
8507 msgid "The target user or group ID is not valid in this user namespace."
8511 #: build/C/man2/seteuid.2:99
8513 "The calling process is not privileged (Linux: does not have the "
8514 "B<CAP_SETUID> capability in the case of B<seteuid>(), or the B<CAP_SETGID> "
8515 "capability in the case of B<setegid>()) and I<euid> (respectively, I<egid>) "
8516 "is not the real user (group) ID, the effective user (group) ID, or the saved "
8517 "set-user-ID (saved set-group-ID)."
8521 #: build/C/man2/seteuid.2:101
8522 msgid "4.3BSD, POSIX.1-2001."
8526 #: build/C/man2/seteuid.2:107
8528 "Setting the effective user (group) ID to the saved set-user-ID (saved "
8529 "set-group-ID) is possible since Linux 1.1.37 (1.1.38). On an arbitrary "
8530 "system one should check B<_POSIX_SAVED_IDS>."
8534 #: build/C/man2/seteuid.2:123
8536 "Under glibc 2.0 B<seteuid(>I<euid>B<)> is equivalent to B<setreuid(-1,>I< "
8537 "euid>B<)> and hence may change the saved set-user-ID. Under glibc 2.1 and "
8538 "later it is equivalent to B<setresuid(-1,>I< euid>B<, -1)> and hence does "
8539 "not change the saved set-user-ID. Analogous remarks hold for B<setegid>(), "
8540 "with the difference that the change in implementation from B<setregid(-1,>I< "
8541 "egid>B<)> to B<setresgid(-1,>I< egid>B<, -1)> occurred in glibc 2.2 or 2.3 "
8542 "(depending on the hardware architecture)."
8546 #: build/C/man2/seteuid.2:132
8548 "According to POSIX.1, B<seteuid>() (B<setegid>()) need not permit I<euid> "
8549 "(I<egid>) to be the same value as the current effective user (group) ID, "
8550 "and some implementations do not permit this."
8554 #: build/C/man2/seteuid.2:141
8556 "On Linux, B<seteuid>() and B<setegid>() are implemented as library "
8557 "functions that call, respectively, B<setreuid>(2) and B<setresgid>(2)."
8561 #: build/C/man2/seteuid.2:148
8563 "B<geteuid>(2), B<setresuid>(2), B<setreuid>(2), B<setuid>(2), "
8564 "B<capabilities>(7), B<credentials>(7), B<user_namespaces>(7)"
8568 #: build/C/man2/setfsgid.2:31
8574 #: build/C/man2/setfsgid.2:31 build/C/man2/setfsuid.2:31
8580 #: build/C/man2/setfsgid.2:34
8581 msgid "setfsgid - set group identity used for filesystem checks"
8585 #: build/C/man2/setfsgid.2:36 build/C/man2/setfsuid.2:36
8586 msgid "B<#include E<lt>sys/fsuid.hE<gt>>"
8590 #: build/C/man2/setfsgid.2:38
8591 msgid "B<int setfsgid(uid_t >I<fsgid>B<);>"
8595 #: build/C/man2/setfsgid.2:51
8597 "The system call B<setfsgid>() changes the value of the caller's filesystem "
8598 "group ID\\(emthe group ID that the Linux kernel uses to check for all "
8599 "accesses to the filesystem. Normally, the value of the filesystem group ID "
8600 "will shadow the value of the effective group ID. In fact, whenever the "
8601 "effective group ID is changed, the filesystem group ID will also be changed "
8602 "to the new value of the effective group ID."
8606 #: build/C/man2/setfsgid.2:62
8608 "Explicit calls to B<setfsuid>(2) and B<setfsgid>() are usually used only "
8609 "by programs such as the Linux NFS server that need to change what user and "
8610 "group ID is used for file access without a corresponding change in the real "
8611 "and effective user and group IDs. A change in the normal user IDs for a "
8612 "program such as the NFS server is a security hole that can expose it to "
8613 "unwanted signals. (But see below.)"
8617 #: build/C/man2/setfsgid.2:68
8619 "B<setfsgid>() will succeed only if the caller is the superuser or if "
8620 "I<fsgid> matches either the caller's real group ID, effective group ID, "
8621 "saved set-group-ID, or current the filesystem user ID."
8625 #: build/C/man2/setfsgid.2:71
8627 "On both success and failure, this call returns the previous filesystem group "
8631 #. This system call is present since Linux 1.1.44
8632 #. and in libc since libc 4.7.6.
8634 #: build/C/man2/setfsgid.2:75 build/C/man2/setfsuid.2:75
8635 msgid "This system call is present in Linux since version 1.2."
8639 #: build/C/man2/setfsgid.2:79
8641 "B<setfsgid>() is Linux-specific and should not be used in programs intended "
8646 #: build/C/man2/setfsgid.2:85
8648 "When glibc determines that the argument is not a valid group ID, it will "
8649 "return -1 and set I<errno> to B<EINVAL> without attempting the system call."
8653 #: build/C/man2/setfsgid.2:96
8655 "Note that at the time this system call was introduced, a process could send "
8656 "a signal to a process with the same effective user ID. Today signal "
8657 "permission handling is slightly different. See B<setfsuid>(2) for a "
8658 "discussion of why the use of both B<setfsuid>(2) and B<setfsgid>() is "
8659 "nowadays unneeded."
8663 #: build/C/man2/setfsgid.2:106
8665 "The original Linux B<setfsgid>() system call supported only 16-bit group "
8666 "IDs. Subsequently, Linux 2.4 added B<setfsgid32>() supporting 32-bit IDs. "
8667 "The glibc B<setfsgid>() wrapper function transparently deals with the "
8668 "variation across kernel versions."
8672 #: build/C/man2/setfsgid.2:123
8674 "No error indications of any kind are returned to the caller, and the fact "
8675 "that both successful and unsuccessful calls return the same value makes it "
8676 "impossible to directly determine whether the call succeeded or failed. "
8677 "Instead, the caller must resort to looking at the return value from a "
8678 "further call such as I<setfsgid(-1)> (which will always fail), in order to "
8679 "determine if a preceding call to B<setfsgid>() changed the filesystem group "
8680 "ID. At the very least, B<EPERM> should be returned when the call fails "
8681 "(because the caller lacks the B<CAP_SETGID> capability)."
8685 #: build/C/man2/setfsgid.2:127
8686 msgid "B<kill>(2), B<setfsuid>(2), B<capabilities>(7), B<credentials>(7)"
8690 #: build/C/man2/setfsuid.2:31
8696 #: build/C/man2/setfsuid.2:34
8697 msgid "setfsuid - set user identity used for filesystem checks"
8701 #: build/C/man2/setfsuid.2:38
8702 msgid "B<int setfsuid(uid_t >I<fsuid>B<);>"
8706 #: build/C/man2/setfsuid.2:51
8708 "The system call B<setfsuid>() changes the value of the caller's filesystem "
8709 "user ID\\(emthe user ID that the Linux kernel uses to check for all accesses "
8710 "to the filesystem. Normally, the value of the filesystem user ID will "
8711 "shadow the value of the effective user ID. In fact, whenever the effective "
8712 "user ID is changed, the filesystem user ID will also be changed to the new "
8713 "value of the effective user ID."
8717 #: build/C/man2/setfsuid.2:62
8719 "Explicit calls to B<setfsuid>() and B<setfsgid>(2) are usually used only "
8720 "by programs such as the Linux NFS server that need to change what user and "
8721 "group ID is used for file access without a corresponding change in the real "
8722 "and effective user and group IDs. A change in the normal user IDs for a "
8723 "program such as the NFS server is a security hole that can expose it to "
8724 "unwanted signals. (But see below.)"
8728 #: build/C/man2/setfsuid.2:68
8730 "B<setfsuid>() will succeed only if the caller is the superuser or if "
8731 "I<fsuid> matches either the caller's real user ID, effective user ID, saved "
8732 "set-user-ID, or current filesystem user ID."
8736 #: build/C/man2/setfsuid.2:71
8738 "On both success and failure, this call returns the previous filesystem user "
8743 #: build/C/man2/setfsuid.2:79
8745 "B<setfsuid>() is Linux-specific and should not be used in programs intended "
8750 #: build/C/man2/setfsuid.2:85
8752 "When glibc determines that the argument is not a valid user ID, it will "
8753 "return -1 and set I<errno> to B<EINVAL> without attempting the system call."
8757 #: build/C/man2/setfsuid.2:104
8759 "At the time when this system call was introduced, one process could send a "
8760 "signal to another process with the same effective user ID. This meant that "
8761 "if a privileged process changed its effective user ID for the purpose of "
8762 "file permission checking, then it could become vulnerable to receiving "
8763 "signals sent by another (unprivileged) process with the same user ID. The "
8764 "filesystem user ID attribute was thus added to allow a process to change its "
8765 "user ID for the purposes of file permission checking without at the same "
8766 "time becoming vulnerable to receiving unwanted signals. Since Linux 2.0, "
8767 "signal permission handling is different (see B<kill>(2)), with the result "
8768 "that a process change can change its effective user ID without being "
8769 "vulnerable to receiving signals from unwanted processes. Thus, "
8770 "B<setfsuid>() is nowadays unneeded and should be avoided in new "
8771 "applications (likewise for B<setfsgid>(2))."
8775 #: build/C/man2/setfsuid.2:114
8777 "The original Linux B<setfsuid>() system call supported only 16-bit user "
8778 "IDs. Subsequently, Linux 2.4 added B<setfsuid32>() supporting 32-bit IDs. "
8779 "The glibc B<setfsuid>() wrapper function transparently deals with the "
8780 "variation across kernel versions."
8784 #: build/C/man2/setfsuid.2:131
8786 "No error indications of any kind are returned to the caller, and the fact "
8787 "that both successful and unsuccessful calls return the same value makes it "
8788 "impossible to directly determine whether the call succeeded or failed. "
8789 "Instead, the caller must resort to looking at the return value from a "
8790 "further call such as I<setfsuid(-1)> (which will always fail), in order to "
8791 "determine if a preceding call to B<setfsuid>() changed the filesystem user "
8792 "ID. At the very least, B<EPERM> should be returned when the call fails "
8793 "(because the caller lacks the B<CAP_SETUID> capability)."
8797 #: build/C/man2/setfsuid.2:135
8798 msgid "B<kill>(2), B<setfsgid>(2), B<capabilities>(7), B<credentials>(7)"
8802 #: build/C/man2/setgid.2:29
8808 #: build/C/man2/setgid.2:32
8809 msgid "setgid - set group identity"
8813 #: build/C/man2/setgid.2:38
8814 msgid "B<int setgid(gid_t >I<gid>B<);>"
8818 #: build/C/man2/setgid.2:43
8820 "B<setgid>() sets the effective group ID of the calling process. If the "
8821 "caller is the superuser, the real GID and saved set-group-ID are also set."
8825 #: build/C/man2/setgid.2:53
8827 "Under Linux, B<setgid>() is implemented like the POSIX version with the "
8828 "B<_POSIX_SAVED_IDS> feature. This allows a set-group-ID program that is not "
8829 "set-user-ID-root to drop all of its group privileges, do some un-privileged "
8830 "work, and then reengage the original effective group ID in a secure manner."
8834 #: build/C/man2/setgid.2:64
8835 msgid "The group ID specified in I<gid> is not valid in this user namespace."
8839 #: build/C/man2/setgid.2:71
8841 "The calling process is not privileged (does not have the B<CAP_SETGID> "
8842 "capability), and I<gid> does not match the real group ID or saved "
8843 "set-group-ID of the calling process."
8847 #: build/C/man2/setgid.2:83
8849 "The original Linux B<setgid>() system call supported only 16-bit group "
8850 "IDs. Subsequently, Linux 2.4 added B<setgid32>() supporting 32-bit IDs. "
8851 "The glibc B<setgid>() wrapper function transparently deals with the "
8852 "variation across kernel versions."
8856 #: build/C/man2/setgid.2:89
8858 "B<getgid>(2), B<setegid>(2), B<setregid>(2), B<capabilities>(7), "
8859 "B<credentials>(7), B<user_namespaces>(7)"
8863 #: build/C/man2/setpgid.2:48
8869 #: build/C/man2/setpgid.2:48
8875 #: build/C/man2/setpgid.2:51
8876 msgid "setpgid, getpgid, setpgrp, getpgrp - set/get process group"
8880 #: build/C/man2/setpgid.2:55
8881 msgid "B<int setpgid(pid_t >I<pid>B<, pid_t >I<pgid>B<);>"
8885 #: build/C/man2/setpgid.2:57
8886 msgid "B<pid_t getpgid(pid_t >I<pid>B<);>"
8890 #: build/C/man2/setpgid.2:59
8891 msgid "B<pid_t getpgrp(void);> /* POSIX.1 version */"
8895 #: build/C/man2/setpgid.2:62
8897 "B<pid_t getpgrp(pid_t >I<pid>B<);\\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ > /* BSD "
8902 #: build/C/man2/setpgid.2:64
8903 msgid "B<int setpgrp(void);> /* System V version */"
8907 #: build/C/man2/setpgid.2:67
8908 msgid "B<int setpgrp(pid_t >I<pid>B<, pid_t >I<pgid>B<);\\ > /* BSD version */"
8912 #: build/C/man2/setpgid.2:76
8913 msgid "B<getpgid>():"
8917 #: build/C/man2/setpgid.2:84
8918 msgid "B<setpgrp>() (POSIX.1):"
8922 #: build/C/man2/setpgid.2:87
8925 " _SVID_SOURCE || _XOPEN_SOURCE\\ E<gt>=\\ 500 ||\n"
8926 " _XOPEN_SOURCE\\ &&\\ _XOPEN_SOURCE_EXTENDED\n"
8930 #: build/C/man2/setpgid.2:89
8932 msgid " || /* Since glibc 2.19: */ _BSD_SOURCE\n"
8936 #: build/C/man2/setpgid.2:93
8937 msgid "B<setpgrp>()\\ (BSD), B<getpgrp>()\\ (BSD) [before glibc 2.19]:"
8941 #: build/C/man2/setpgid.2:97
8945 " !\\ (_POSIX_SOURCE || _POSIX_C_SOURCE || _XOPEN_SOURCE ||\n"
8946 " _XOPEN_SOURCE_EXTENDED || _GNU_SOURCE || _SVID_SOURCE)\n"
8950 #: build/C/man2/setpgid.2:109
8952 "All of these interfaces are available on Linux, and are used for getting and "
8953 "setting the process group ID (PGID) of a process. The preferred, "
8954 "POSIX.1-specified ways of doing this are: B<getpgrp>(void), for retrieving "
8955 "the calling process's PGID; and B<setpgid>(), for setting a process's PGID."
8959 #: build/C/man2/setpgid.2:134
8961 "B<setpgid>() sets the PGID of the process specified by I<pid> to I<pgid>. "
8962 "If I<pid> is zero, then the process ID of the calling process is used. If "
8963 "I<pgid> is zero, then the PGID of the process specified by I<pid> is made "
8964 "the same as its process ID. If B<setpgid>() is used to move a process from "
8965 "one process group to another (as is done by some shells when creating "
8966 "pipelines), both process groups must be part of the same session (see "
8967 "B<setsid>(2) and B<credentials>(7)). In this case, the I<pgid> specifies "
8968 "an existing process group to be joined and the session ID of that group must "
8969 "match the session ID of the joining process."
8973 #: build/C/man2/setpgid.2:139
8975 "The POSIX.1 version of B<getpgrp>(), which takes no arguments, returns the "
8976 "PGID of the calling process."
8980 #: build/C/man2/setpgid.2:150
8982 "B<getpgid>() returns the PGID of the process specified by I<pid>. If "
8983 "I<pid> is zero, the process ID of the calling process is used. (Retrieving "
8984 "the PGID of a process other than the caller is rarely necessary, and the "
8985 "POSIX.1 B<getpgrp>() is preferred for that task.)"
8989 #: build/C/man2/setpgid.2:155
8991 "The System\\ V-style B<setpgrp>(), which takes no arguments, is equivalent "
8992 "to I<setpgid(0,\\ 0)>."
8996 #: build/C/man2/setpgid.2:163
8998 "The BSD-specific B<setpgrp>() call, which takes arguments I<pid> and "
8999 "I<pgid>, is is a wrapper function that calls"
9003 #: build/C/man2/setpgid.2:165
9005 msgid " setpgid(pid, pgid)\n"
9008 #. The true BSD setpgrp() system call differs in allowing the PGID
9009 #. to be set to arbitrary values, rather than being restricted to
9010 #. PGIDs in the same session.
9012 #: build/C/man2/setpgid.2:176
9014 "Since glibc 2.19, the BSD-specific B<setpgrp>() function is no longer "
9015 "exposed by I<E<lt>unistd.hE<gt>>; calls should be replaced with the "
9016 "B<setpgid>() call shown above."
9020 #: build/C/man2/setpgid.2:182
9022 "The BSD-specific B<getpgrp>() call, which takes a single I<pid> argument, "
9023 "is a wrapper function that calls"
9027 #: build/C/man2/setpgid.2:184
9029 msgid " getpgid(pid)\n"
9033 #: build/C/man2/setpgid.2:195
9035 "Since glibc 2.19, the BSD-specific B<getpgrp>() function is no longer "
9036 "exposed by I<E<lt>unistd.hE<gt>>; calls should be replaced with calls to the "
9037 "POSIX.1 B<getpgrp>() which takes no arguments (if the intent is to obtain "
9038 "the caller's PGID), or with the B<getpgid>() call shown above."
9042 #: build/C/man2/setpgid.2:204
9044 "On success, B<setpgid>() and B<setpgrp>() return zero. On error, -1 is "
9045 "returned, and I<errno> is set appropriately."
9049 #: build/C/man2/setpgid.2:208
9050 msgid "The POSIX.1 B<getpgrp>() always returns the PGID of the caller."
9054 #: build/C/man2/setpgid.2:216
9056 "B<getpgid>(), and the BSD-specific B<getpgrp>() return a process group on "
9057 "success. On error, -1 is returned, and I<errno> is set appropriately."
9061 #: build/C/man2/setpgid.2:225
9063 "An attempt was made to change the process group ID of one of the children of "
9064 "the calling process and the child had already performed an B<execve>(2) "
9065 "(B<setpgid>(), B<setpgrp>())."
9069 #: build/C/man2/setpgid.2:231
9070 msgid "I<pgid> is less than 0 (B<setpgid>(), B<setpgrp>())."
9074 #: build/C/man2/setpgid.2:240
9076 "An attempt was made to move a process into a process group in a different "
9077 "session, or to change the process group ID of one of the children of the "
9078 "calling process and the child was in a different session, or to change the "
9079 "process group ID of a session leader (B<setpgid>(), B<setpgrp>())."
9083 #: build/C/man2/setpgid.2:250
9085 "For B<getpgid>(): I<pid> does not match any process. For B<setpgid>(): "
9086 "I<pid> is not the calling process and not a child of the calling process."
9090 #: build/C/man2/setpgid.2:256
9092 "B<setpgid>() and the version of B<getpgrp>() with no arguments conform to "
9097 #: build/C/man2/setpgid.2:265
9099 "POSIX.1-2001 also specifies B<getpgid>() and the version of B<setpgrp>() "
9100 "that takes no arguments. (POSIX.1-2008 marks this B<setpgrp>() "
9101 "specification as obsolete.)"
9105 #: build/C/man2/setpgid.2:272
9107 "The version of B<getpgrp>() with one argument and the version of "
9108 "B<setpgrp>() that takes two arguments derive from 4.2BSD, and are not "
9109 "specified by POSIX.1."
9113 #: build/C/man2/setpgid.2:278
9115 "A child created via B<fork>(2) inherits its parent's process group ID. The "
9116 "PGID is preserved across an B<execve>(2)."
9120 #: build/C/man2/setpgid.2:281
9122 "Each process group is a member of a session and each process is a member of "
9123 "the session of which its process group is a member."
9127 #: build/C/man2/setpgid.2:308
9129 "A session can have a controlling terminal. At any time, one (and only one) "
9130 "of the process groups in the session can be the foreground process group for "
9131 "the terminal; the remaining process groups are in the background. If a "
9132 "signal is generated from the terminal (e.g., typing the interrupt key to "
9133 "generate B<SIGINT>), that signal is sent to the foreground process group. "
9134 "(See B<termios>(3) for a description of the characters that generate "
9135 "signals.) Only the foreground process group may B<read>(2) from the "
9136 "terminal; if a background process group tries to B<read>(2) from the "
9137 "terminal, then the group is sent a B<SIGTTIN> signal, which suspends it. "
9138 "The B<tcgetpgrp>(3) and B<tcsetpgrp>(3) functions are used to get/set the "
9139 "foreground process group of the controlling terminal."
9143 #: build/C/man2/setpgid.2:316
9145 "The B<setpgid>() and B<getpgrp>() calls are used by programs such as "
9146 "B<bash>(1) to create process groups in order to implement shell job "
9151 #: build/C/man2/setpgid.2:326
9153 "If a session has a controlling terminal, and the B<CLOCAL> flag for that "
9154 "terminal is not set, and a terminal hangup occurs, then the session leader "
9155 "is sent a B<SIGHUP>. If the session leader exits, then a B<SIGHUP> signal "
9156 "will also be sent to each process in the foreground process group of the "
9157 "controlling terminal."
9160 #. exit.3 refers to the following text:
9162 #: build/C/man2/setpgid.2:340
9164 "If the exit of the process causes a process group to become orphaned, and if "
9165 "any member of the newly orphaned process group is stopped, then a B<SIGHUP> "
9166 "signal followed by a B<SIGCONT> signal will be sent to each process in the "
9167 "newly orphaned process group. An orphaned process group is one in which the "
9168 "parent of every member of process group is either itself also a member of "
9169 "the process group or is a member of a process group in a different session "
9170 "(see also B<credentials>(7))."
9174 #: build/C/man2/setpgid.2:346
9176 "B<getuid>(2), B<setsid>(2), B<tcgetpgrp>(3), B<tcsetpgrp>(3), B<termios>(3), "
9181 #: build/C/man2/setresuid.2:26
9187 #: build/C/man2/setresuid.2:29
9188 msgid "setresuid, setresgid - set real, effective and saved user or group ID"
9192 #: build/C/man2/setresuid.2:35
9193 msgid "B<int setresuid(uid_t >I<ruid>B<, uid_t >I<euid>B<, uid_t >I<suid>B<);>"
9197 #: build/C/man2/setresuid.2:37
9198 msgid "B<int setresgid(gid_t >I<rgid>B<, gid_t >I<egid>B<, gid_t >I<sgid>B<);>"
9202 #: build/C/man2/setresuid.2:41
9204 "B<setresuid>() sets the real user ID, the effective user ID, and the saved "
9205 "set-user-ID of the calling process."
9209 #: build/C/man2/setresuid.2:47
9211 "Unprivileged user processes may change the real UID, effective UID, and "
9212 "saved set-user-ID, each to one of: the current real UID, the current "
9213 "effective UID or the current saved set-user-ID."
9217 #: build/C/man2/setresuid.2:51
9219 "Privileged processes (on Linux, those having the B<CAP_SETUID> capability) "
9220 "may set the real UID, effective UID, and saved set-user-ID to arbitrary "
9225 #: build/C/man2/setresuid.2:53
9226 msgid "If one of the arguments equals -1, the corresponding value is not changed."
9230 #: build/C/man2/setresuid.2:57
9232 "Regardless of what changes are made to the real UID, effective UID, and "
9233 "saved set-user-ID, the filesystem UID is always set to the same value as the "
9234 "(possibly new) effective UID."
9238 #: build/C/man2/setresuid.2:64
9240 "Completely analogously, B<setresgid>() sets the real GID, effective GID, "
9241 "and saved set-group-ID of the calling process (and always modifies the "
9242 "filesystem GID to be the same as the effective GID), with the same "
9243 "restrictions for unprivileged processes."
9247 #: build/C/man2/setresuid.2:76
9249 "I<Note>: there are cases where B<setresuid>() can fail even when the caller "
9250 "is UID 0; it is a grave security error to omit checking for a failure return "
9251 "from B<setresuid>()."
9255 #: build/C/man2/setresuid.2:77 build/C/man2/setresuid.2:84 build/C/man2/setreuid.2:106 build/C/man2/setreuid.2:113 build/C/man2/setuid.2:83 build/C/man2/setuid.2:90
9261 #: build/C/man2/setresuid.2:84 build/C/man2/setreuid.2:113
9263 "The call would change the caller's real UID (i.e., I<ruid> does not match "
9264 "the caller's real UID), but there was a temporary failure allocating the "
9265 "necessary kernel data structures."
9269 #: build/C/man2/setresuid.2:99 build/C/man2/setreuid.2:128
9271 "I<ruid> does not match the caller's real UID and this call would bring the "
9272 "number of processes belonging to the real user ID I<ruid> over the caller's "
9273 "B<RLIMIT_NPROC> resource limit. Since Linux 3.1, this error case no longer "
9274 "occurs (but robust applications should check for this error); see the "
9275 "description of B<EAGAIN> in B<execve>(2)."
9279 #: build/C/man2/setresuid.2:103 build/C/man2/setreuid.2:132
9281 "One or more of the target user or group IDs is not valid in this user "
9286 #: build/C/man2/setresuid.2:107
9288 "The calling process is not privileged (did not have the B<CAP_SETUID> "
9289 "capability) and tried to change the IDs to values that are not permitted."
9293 #: build/C/man2/setresuid.2:109
9294 msgid "These calls are available under Linux since Linux 2.1.44."
9298 #: build/C/man2/setresuid.2:116
9300 "Under HP-UX and FreeBSD, the prototype is found in I<E<lt>unistd.hE<gt>>. "
9301 "Under Linux, the prototype is provided by glibc since version 2.3.2."
9305 #: build/C/man2/setresuid.2:132
9307 "The original Linux B<setresuid>() and B<setresgid>() system calls "
9308 "supported only 16-bit user and group IDs. Subsequently, Linux 2.4 added "
9309 "B<setresuid32>() and B<setresgid32>(), supporting 32-bit IDs. The glibc "
9310 "B<setresuid>() and B<setresgid>() wrapper functions transparently deal "
9311 "with the variations across kernel versions."
9315 #: build/C/man2/setresuid.2:141
9317 "B<getresuid>(2), B<getuid>(2), B<setfsgid>(2), B<setfsuid>(2), "
9318 "B<setreuid>(2), B<setuid>(2), B<capabilities>(7), B<credentials>(7), "
9319 "B<user_namespaces>(7)"
9323 #: build/C/man2/setreuid.2:45
9329 #: build/C/man2/setreuid.2:48
9330 msgid "setreuid, setregid - set real and/or effective user or group ID"
9334 #: build/C/man2/setreuid.2:54
9335 msgid "B<int setreuid(uid_t >I<ruid>B<, uid_t >I<euid>B<);>"
9339 #: build/C/man2/setreuid.2:56
9340 msgid "B<int setregid(gid_t >I<rgid>B<, gid_t >I<egid>B<);>"
9344 #: build/C/man2/setreuid.2:64
9345 msgid "B<setreuid>(), B<setregid>():"
9349 #: build/C/man2/setreuid.2:68
9351 "_BSD_SOURCE || _XOPEN_SOURCE\\ E<gt>=\\ 500 || _XOPEN_SOURCE\\ &&\\ "
9352 "_XOPEN_SOURCE_EXTENDED"
9356 #: build/C/man2/setreuid.2:73
9357 msgid "B<setreuid>() sets real and effective user IDs of the calling process."
9361 #: build/C/man2/setreuid.2:76
9363 "Supplying a value of -1 for either the real or effective user ID forces the "
9364 "system to leave that ID unchanged."
9368 #: build/C/man2/setreuid.2:79
9370 "Unprivileged processes may only set the effective user ID to the real user "
9371 "ID, the effective user ID, or the saved set-user-ID."
9375 #: build/C/man2/setreuid.2:82
9377 "Unprivileged users may only set the real user ID to the real user ID or the "
9378 "effective user ID."
9382 #: build/C/man2/setreuid.2:88
9384 "If the real user ID is set (i.e., I<ruid> is not -1) or the effective user "
9385 "ID is set to a value not equal to the previous real user ID, the saved "
9386 "set-user-ID will be set to the new effective user ID."
9390 #: build/C/man2/setreuid.2:93
9392 "Completely analogously, B<setregid>() sets real and effective group ID's of "
9393 "the calling process, and all of the above holds with \"group\" instead of "
9398 #: build/C/man2/setreuid.2:105
9400 "I<Note>: there are cases where B<setreuid>() can fail even when the caller "
9401 "is UID 0; it is a grave security error to omit checking for a failure return "
9402 "from B<setreuid>()."
9406 #: build/C/man2/setreuid.2:148
9408 "The calling process is not privileged (Linux: does not have the "
9409 "B<CAP_SETUID> capability in the case of B<setreuid>(), or the B<CAP_SETGID> "
9410 "capability in the case of B<setregid>()) and a change other than (i) "
9411 "swapping the effective user (group) ID with the real user (group) ID, or "
9412 "(ii) setting one to the value of the other or (iii) setting the effective "
9413 "user (group) ID to the value of the saved set-user-ID (saved set-group-ID) "
9418 #: build/C/man2/setreuid.2:154
9420 "POSIX.1-2001, 4.3BSD (the B<setreuid>() and B<setregid>() function calls "
9421 "first appeared in 4.2BSD)."
9425 #: build/C/man2/setreuid.2:158
9427 "Setting the effective user (group) ID to the saved set-user-ID (saved "
9428 "set-group-ID) is possible since Linux 1.1.37 (1.1.38)."
9432 #: build/C/man2/setreuid.2:175
9434 "POSIX.1 does not specify all of possible ID changes that are permitted on "
9435 "Linux for an unprivileged process. For B<setreuid>(), the effective user ID "
9436 "can be made the same as the real user ID or the save set-user-ID, and it is "
9437 "unspecified whether unprivileged processes may set the real user ID to the "
9438 "real user ID, the effective user ID, or the saved set-user-ID. For "
9439 "B<setregid>(), the real group ID can be changed to the value of the saved "
9440 "set-group-ID, and the effective group ID can be changed to the value of the "
9441 "real group ID or the saved set-group-ID. The precise details of what ID "
9442 "changes are permitted vary across implementations."
9446 #: build/C/man2/setreuid.2:178
9448 "POSIX.1 makes no specification about the effect of these calls on the saved "
9449 "set-user-ID and saved set-group-ID."
9453 #: build/C/man2/setreuid.2:194
9455 "The original Linux B<setreuid>() and B<setregid>() system calls supported "
9456 "only 16-bit user and group IDs. Subsequently, Linux 2.4 added "
9457 "B<setreuid32>() and B<setregid32>(), supporting 32-bit IDs. The glibc "
9458 "B<setreuid>() and B<setregid>() wrapper functions transparently deal with "
9459 "the variations across kernel versions."
9463 #: build/C/man2/setreuid.2:202
9465 "B<getgid>(2), B<getuid>(2), B<seteuid>(2), B<setgid>(2), B<setresuid>(2), "
9466 "B<setuid>(2), B<capabilities>(7), B<user_namespaces>(7)"
9470 #: build/C/man2/setsid.2:31
9476 #: build/C/man2/setsid.2:34
9477 msgid "setsid - creates a session and sets the process group ID"
9481 #: build/C/man2/setsid.2:39
9482 msgid "B<pid_t setsid(void);>"
9486 #: build/C/man2/setsid.2:50
9488 "B<setsid>() creates a new session if the calling process is not a process "
9489 "group leader. The calling process is the leader of the new session (i.e., "
9490 "its session ID is made the same as it process ID). The calling process also "
9491 "becomes the process group leader of a new process group in the session "
9492 "(i.e., its process group ID is made the same as it process ID)."
9496 #: build/C/man2/setsid.2:54
9498 "The calling process will be the only process in the new process group and in "
9499 "the new session. The new session has no controlling terminal."
9503 #: build/C/man2/setsid.2:61
9505 "On success, the (new) session ID of the calling process is returned. On "
9506 "error, I<(pid_t)\\ -1> is returned, and I<errno> is set to indicate the "
9511 #: build/C/man2/setsid.2:68
9513 "The process group ID of any process equals the PID of the calling process. "
9514 "Thus, in particular, B<setsid>() fails if the calling process is already a "
9515 "process group leader."
9519 #: build/C/man2/setsid.2:76
9521 "A child created via B<fork>(2) inherits its parent's session ID. The "
9522 "session ID is preserved across an B<execve>(2)."
9526 #: build/C/man2/setsid.2:93
9528 "A process group leader is a process whose process group ID equals its PID. "
9529 "Disallowing a process group leader from calling B<setsid>() prevents the "
9530 "possibility that a process group leader places itself in a new session while "
9531 "other processes in the process group remain in the original session; such a "
9532 "scenario would break the strict two-level hierarchy of sessions and process "
9533 "groups. In order to be sure that B<setsid>() will succeed, B<fork>(2) and "
9534 "B<_exit>(2), and have the child do B<setsid>()."
9538 #: build/C/man2/setsid.2:99
9540 "B<setsid>(1), B<getsid>(2), B<setpgid>(2), B<setpgrp>(2), B<tcgetsid>(3), "
9545 #: build/C/man2/setuid.2:30
9551 #: build/C/man2/setuid.2:33
9552 msgid "setuid - set user identity"
9556 #: build/C/man2/setuid.2:39
9557 msgid "B<int setuid(uid_t >I<uid>B<);>"
9561 #: build/C/man2/setuid.2:44
9563 "B<setuid>() sets the effective user ID of the calling process. If the "
9564 "effective UID of the caller is root, the real UID and saved set-user-ID are "
9569 #: build/C/man2/setuid.2:53
9571 "Under Linux, B<setuid>() is implemented like the POSIX version with the "
9572 "B<_POSIX_SAVED_IDS> feature. This allows a set-user-ID (other than root) "
9573 "program to drop all of its user privileges, do some un-privileged work, and "
9574 "then reengage the original effective user ID in a secure manner."
9578 #: build/C/man2/setuid.2:63
9580 "If the user is root or the program is set-user-ID-root, special care must be "
9581 "taken. The B<setuid>() function checks the effective user ID of the caller "
9582 "and if it is the superuser, all process-related user ID's are set to "
9583 "I<uid>. After this has occurred, it is impossible for the program to regain "
9588 #: build/C/man2/setuid.2:70
9590 "Thus, a set-user-ID-root program wishing to temporarily drop root "
9591 "privileges, assume the identity of an unprivileged user, and then regain "
9592 "root privileges afterward cannot use B<setuid>(). You can accomplish this "
9593 "with B<seteuid>(2)."
9597 #: build/C/man2/setuid.2:82
9599 "I<Note>: there are cases where B<setuid>() can fail even when the caller is "
9600 "UID 0; it is a grave security error to omit checking for a failure return "
9605 #: build/C/man2/setuid.2:90
9607 "The call would change the caller's real UID (i.e., I<uid> does not match the "
9608 "caller's real UID), but there was a temporary failure allocating the "
9609 "necessary kernel data structures."
9613 #: build/C/man2/setuid.2:105
9615 "I<uid> does not match the real user ID of the caller and this call would "
9616 "bring the number of processes belonging to the real user ID I<uid> over the "
9617 "caller's B<RLIMIT_NPROC> resource limit. Since Linux 3.1, this error case "
9618 "no longer occurs (but robust applications should check for this error); see "
9619 "the description of B<EAGAIN> in B<execve>(2)."
9623 #: build/C/man2/setuid.2:110
9624 msgid "The user ID specified in I<uid> is not valid in this user namespace."
9628 #: build/C/man2/setuid.2:117
9630 "The user is not privileged (Linux: does not have the B<CAP_SETUID> "
9631 "capability) and I<uid> does not match the real UID or saved set-user-ID of "
9632 "the calling process."
9635 #. SVr4 documents an additional EINVAL error condition.
9637 #: build/C/man2/setuid.2:122
9639 "SVr4, POSIX.1-2001. Not quite compatible with the 4.4BSD call, which sets "
9640 "all of the real, saved, and effective user IDs."
9644 #: build/C/man2/setuid.2:130
9646 "Linux has the concept of the filesystem user ID, normally equal to the "
9647 "effective user ID. The B<setuid>() call also sets the filesystem user ID "
9648 "of the calling process. See B<setfsuid>(2)."
9652 #: build/C/man2/setuid.2:135
9654 "If I<uid> is different from the old effective UID, the process will be "
9655 "forbidden from leaving core dumps."
9659 #: build/C/man2/setuid.2:145
9661 "The original Linux B<setuid>() system call supported only 16-bit user IDs. "
9662 "Subsequently, Linux 2.4 added B<setuid32>() supporting 32-bit IDs. The "
9663 "glibc B<setuid>() wrapper function transparently deals with the variation "
9664 "across kernel versions."
9668 #: build/C/man2/setuid.2:152
9670 "B<getuid>(2), B<seteuid>(2), B<setfsuid>(2), B<setreuid>(2), "
9671 "B<capabilities>(7), B<credentials>(7), B<user_namespaces>(7)"
9675 #: build/C/man7/svipc.7:40
9681 #: build/C/man7/svipc.7:43
9682 msgid "svipc - System V interprocess communication mechanisms"
9686 #: build/C/man7/svipc.7:48
9689 "B<#include E<lt>sys/msg.hE<gt>>\n"
9690 "B<#include E<lt>sys/sem.hE<gt>>\n"
9691 "B<#include E<lt>sys/shm.hE<gt>>\n"
9695 #: build/C/man7/svipc.7:56
9697 "This manual page refers to the Linux implementation of the System V "
9698 "interprocess communication (IPC) mechanisms: message queues, semaphore sets, "
9699 "and shared memory segments. In the following, the word I<resource> means an "
9700 "instantiation of one among such mechanisms."
9704 #: build/C/man7/svipc.7:56
9706 msgid "Resource access permissions"
9710 #: build/C/man7/svipc.7:64
9712 "For each resource, the system uses a common structure of type I<struct "
9713 "ipc_perm> to store information needed in determining permissions to perform "
9714 "an IPC operation. The I<ipc_perm> structure includes the following members:"
9718 #: build/C/man7/svipc.7:74
9721 "struct ipc_perm {\n"
9722 " uid_t cuid; /* creator user ID */\n"
9723 " gid_t cgid; /* creator group ID */\n"
9724 " uid_t uid; /* owner user ID */\n"
9725 " gid_t gid; /* owner group ID */\n"
9726 " unsigned short mode; /* r/w permissions */\n"
9731 #: build/C/man7/svipc.7:84
9733 "The I<mode> member of the I<ipc_perm> structure defines, with its lower 9 "
9734 "bits, the access permissions to the resource for a process executing an IPC "
9735 "system call. The permissions are interpreted as follows:"
9739 #: build/C/man7/svipc.7:88
9742 " 0400 Read by user.\n"
9743 " 0200 Write by user.\n"
9747 #: build/C/man7/svipc.7:91
9750 " 0040 Read by group.\n"
9751 " 0020 Write by group.\n"
9755 #: build/C/man7/svipc.7:94
9758 " 0004 Read by others.\n"
9759 " 0002 Write by others.\n"
9763 #: build/C/man7/svipc.7:102
9765 "Bits 0100, 0010, and 0001 (the execute bits) are unused by the system. "
9766 "Furthermore, \"write\" effectively means \"alter\" for a semaphore set."
9770 #: build/C/man7/svipc.7:105
9771 msgid "The same system header file also defines the following symbolic constants:"
9775 #: build/C/man7/svipc.7:105
9777 msgid "B<IPC_CREAT>"
9781 #: build/C/man7/svipc.7:108
9782 msgid "Create entry if key doesn't exist."
9786 #: build/C/man7/svipc.7:108
9792 #: build/C/man7/svipc.7:111
9793 msgid "Fail if key exists."
9797 #: build/C/man7/svipc.7:111
9799 msgid "B<IPC_NOWAIT>"
9803 #: build/C/man7/svipc.7:114
9804 msgid "Error if request must wait."
9808 #: build/C/man7/svipc.7:114
9810 msgid "B<IPC_PRIVATE>"
9814 #: build/C/man7/svipc.7:117
9815 msgid "Private key."
9819 #: build/C/man7/svipc.7:117
9825 #: build/C/man7/svipc.7:120
9826 msgid "Remove resource."
9830 #: build/C/man7/svipc.7:120
9836 #: build/C/man7/svipc.7:123
9837 msgid "Set resource options."
9841 #: build/C/man7/svipc.7:123
9847 #: build/C/man7/svipc.7:126
9848 msgid "Get resource options."
9852 #: build/C/man7/svipc.7:135
9854 "Note that B<IPC_PRIVATE> is a I<key_t> type, while all the other symbolic "
9855 "constants are flag fields and can be OR'ed into an I<int> type variable."
9859 #: build/C/man7/svipc.7:135
9861 msgid "Message queues"
9865 #: build/C/man7/svipc.7:143
9867 "A message queue is uniquely identified by a positive integer (its I<msqid>) "
9868 "and has an associated data structure of type I<struct msqid_ds>, defined in "
9869 "I<E<lt>sys/msg.hE<gt>>, containing the following members:"
9873 #: build/C/man7/svipc.7:156
9876 "struct msqid_ds {\n"
9877 " struct ipc_perm msg_perm;\n"
9878 " msgqnum_t msg_qnum; /* no of messages on queue */\n"
9879 " msglen_t msg_qbytes; /* bytes max on a queue */\n"
9880 " pid_t msg_lspid; /* PID of last msgsnd(2) call */\n"
9881 " pid_t msg_lrpid; /* PID of last msgrcv(2) call */\n"
9882 " time_t msg_stime; /* last msgsnd(2) time */\n"
9883 " time_t msg_rtime; /* last msgrcv(2) time */\n"
9884 " time_t msg_ctime; /* last change time */\n"
9889 #: build/C/man7/svipc.7:158
9895 #: build/C/man7/svipc.7:163
9897 "I<ipc_perm> structure that specifies the access permissions on the message "
9902 #: build/C/man7/svipc.7:163
9908 #: build/C/man7/svipc.7:166
9909 msgid "Number of messages currently on the message queue."
9913 #: build/C/man7/svipc.7:166
9915 msgid "I<msg_qbytes>"
9919 #: build/C/man7/svipc.7:170
9920 msgid "Maximum number of bytes of message text allowed on the message queue."
9924 #: build/C/man7/svipc.7:170
9926 msgid "I<msg_lspid>"
9930 #: build/C/man7/svipc.7:175
9931 msgid "ID of the process that performed the last B<msgsnd>(2) system call."
9935 #: build/C/man7/svipc.7:175
9937 msgid "I<msg_lrpid>"
9941 #: build/C/man7/svipc.7:180
9942 msgid "ID of the process that performed the last B<msgrcv>(2) system call."
9946 #: build/C/man7/svipc.7:180
9948 msgid "I<msg_stime>"
9952 #: build/C/man7/svipc.7:185
9953 msgid "Time of the last B<msgsnd>(2) system call."
9957 #: build/C/man7/svipc.7:185
9959 msgid "I<msg_rtime>"
9963 #: build/C/man7/svipc.7:190
9964 msgid "Time of the last B<msgrcv>(2) system call."
9968 #: build/C/man7/svipc.7:190
9970 msgid "I<msg_ctime>"
9974 #: build/C/man7/svipc.7:196
9976 "Time of the last system call that changed a member of the I<msqid_ds> "
9981 #: build/C/man7/svipc.7:196
9983 msgid "Semaphore sets"
9987 #: build/C/man7/svipc.7:204
9989 "A semaphore set is uniquely identified by a positive integer (its I<semid>) "
9990 "and has an associated data structure of type I<struct semid_ds>, defined in "
9991 "I<E<lt>sys/sem.hE<gt>>, containing the following members:"
9995 #: build/C/man7/svipc.7:213
9998 "struct semid_ds {\n"
9999 " struct ipc_perm sem_perm;\n"
10000 " time_t sem_otime; /* last operation time */\n"
10001 " time_t sem_ctime; /* last change time */\n"
10002 " unsigned long sem_nsems; /* count of sems in set */\n"
10007 #: build/C/man7/svipc.7:215
10009 msgid "I<sem_perm>"
10012 #. type: Plain text
10013 #: build/C/man7/svipc.7:220
10015 "I<ipc_perm> structure that specifies the access permissions on the semaphore "
10020 #: build/C/man7/svipc.7:220
10022 msgid "I<sem_otime>"
10025 #. type: Plain text
10026 #: build/C/man7/svipc.7:225
10027 msgid "Time of last B<semop>(2) system call."
10031 #: build/C/man7/svipc.7:225
10033 msgid "I<sem_ctime>"
10036 #. type: Plain text
10037 #: build/C/man7/svipc.7:231
10039 "Time of last B<semctl>(2) system call that changed a member of the above "
10040 "structure or of one semaphore belonging to the set."
10044 #: build/C/man7/svipc.7:231
10046 msgid "I<sem_nsems>"
10049 #. type: Plain text
10050 #: build/C/man7/svipc.7:239
10052 "Number of semaphores in the set. Each semaphore of the set is referenced by "
10053 "a nonnegative integer ranging from B<0> to I<sem_nsems-1>."
10056 #. type: Plain text
10057 #: build/C/man7/svipc.7:243
10059 "A semaphore is a data structure of type I<struct sem> containing the "
10060 "following members:"
10063 #. unsigned short semncnt; /* nr awaiting semval to increase */
10064 #. unsigned short semzcnt; /* nr awaiting semval = 0 */
10065 #. type: Plain text
10066 #: build/C/man7/svipc.7:252
10070 " int semval; /* semaphore value */\n"
10071 " int sempid; /* PID for last operation */\n"
10076 #: build/C/man7/svipc.7:254
10081 #. type: Plain text
10082 #: build/C/man7/svipc.7:257
10083 msgid "Semaphore value: a nonnegative integer."
10087 #: build/C/man7/svipc.7:257
10094 #. Number of processes suspended awaiting for
10099 #. Number of processes suspended awaiting for
10102 #. type: Plain text
10103 #: build/C/man7/svipc.7:271
10105 "ID of the last process that performed a semaphore operation on this "
10110 #: build/C/man7/svipc.7:271
10112 msgid "Shared memory segments"
10115 #. type: Plain text
10116 #: build/C/man7/svipc.7:279
10118 "A shared memory segment is uniquely identified by a positive integer (its "
10119 "I<shmid>) and has an associated data structure of type I<struct shmid_ds>, "
10120 "defined in I<E<lt>sys/shm.hE<gt>>, containing the following members:"
10123 #. type: Plain text
10124 #: build/C/man7/svipc.7:292
10127 "struct shmid_ds {\n"
10128 " struct ipc_perm shm_perm;\n"
10129 " size_t shm_segsz; /* size of segment */\n"
10130 " pid_t shm_cpid; /* PID of creator */\n"
10131 " pid_t shm_lpid; /* PID, last operation */\n"
10132 " shmatt_t shm_nattch; /* no. of current attaches */\n"
10133 " time_t shm_atime; /* time of last attach */\n"
10134 " time_t shm_dtime; /* time of last detach */\n"
10135 " time_t shm_ctime; /* time of last change */\n"
10140 #: build/C/man7/svipc.7:294
10142 msgid "I<shm_perm>"
10145 #. type: Plain text
10146 #: build/C/man7/svipc.7:299
10148 "I<ipc_perm> structure that specifies the access permissions on the shared "
10153 #: build/C/man7/svipc.7:299
10155 msgid "I<shm_segsz>"
10158 #. type: Plain text
10159 #: build/C/man7/svipc.7:302
10160 msgid "Size in bytes of the shared memory segment."
10164 #: build/C/man7/svipc.7:302
10166 msgid "I<shm_cpid>"
10169 #. type: Plain text
10170 #: build/C/man7/svipc.7:305
10171 msgid "ID of the process that created the shared memory segment."
10175 #: build/C/man7/svipc.7:305
10177 msgid "I<shm_lpid>"
10180 #. type: Plain text
10181 #: build/C/man7/svipc.7:312
10183 "ID of the last process that executed a B<shmat>(2) or B<shmdt>(2) system "
10188 #: build/C/man7/svipc.7:312
10190 msgid "I<shm_nattch>"
10193 #. type: Plain text
10194 #: build/C/man7/svipc.7:315
10195 msgid "Number of current alive attaches for this shared memory segment."
10199 #: build/C/man7/svipc.7:315
10201 msgid "I<shm_atime>"
10204 #. type: Plain text
10205 #: build/C/man7/svipc.7:320
10206 msgid "Time of the last B<shmat>(2) system call."
10210 #: build/C/man7/svipc.7:320
10212 msgid "I<shm_dtime>"
10215 #. type: Plain text
10216 #: build/C/man7/svipc.7:325
10217 msgid "Time of the last B<shmdt>(2) system call."
10221 #: build/C/man7/svipc.7:325
10223 msgid "I<shm_ctime>"
10226 #. type: Plain text
10227 #: build/C/man7/svipc.7:331
10228 msgid "Time of the last B<shmctl>(2) system call that changed I<shmid_ds>."
10232 #: build/C/man7/svipc.7:331
10234 msgid "IPC namespaces"
10237 #. type: Plain text
10238 #: build/C/man7/svipc.7:335
10240 "For a discussion of the interaction of System V IPC objects and IPC "
10241 "namespaces, see B<namespaces>(7)."
10244 #. type: Plain text
10245 #: build/C/man7/svipc.7:352
10247 "B<ipcmk>(1), B<ipcrm>(1), B<ipcs>(1), B<ipc>(2), B<msgctl>(2), B<msgget>(2), "
10248 "B<msgrcv>(2), B<msgsnd>(2), B<semctl>(2), B<semget>(2), B<semop>(2), "
10249 "B<shmat>(2), B<shmctl>(2), B<shmdt>(2), B<shmget>(2), B<ftok>(3), "
10254 #: build/C/man3/ulimit.3:27
10260 #: build/C/man3/ulimit.3:27
10265 #. type: Plain text
10266 #: build/C/man3/ulimit.3:30
10267 msgid "ulimit - get and set user limits"
10270 #. type: Plain text
10271 #: build/C/man3/ulimit.3:32
10272 msgid "B<#include E<lt>ulimit.hE<gt>>"
10275 #. type: Plain text
10276 #: build/C/man3/ulimit.3:34
10277 msgid "B<long ulimit(int >I<cmd>B<, long >I<newlimit>B<);>"
10280 #. type: Plain text
10281 #: build/C/man3/ulimit.3:46
10283 "Warning: This routine is obsolete. Use B<getrlimit>(2), B<setrlimit>(2), "
10284 "and B<sysconf>(3) instead. For the shell command B<ulimit>(), see "
10288 #. type: Plain text
10289 #: build/C/man3/ulimit.3:53
10291 "The B<ulimit>() call will get or set some limit for the calling process. "
10292 "The I<cmd> argument can have one of the following values."
10296 #: build/C/man3/ulimit.3:53
10298 msgid "B<UL_GETFSIZE>"
10301 #. type: Plain text
10302 #: build/C/man3/ulimit.3:56
10303 msgid "Return the limit on the size of a file, in units of 512 bytes."
10307 #: build/C/man3/ulimit.3:56
10309 msgid "B<UL_SETFSIZE>"
10312 #. type: Plain text
10313 #: build/C/man3/ulimit.3:59
10314 msgid "Set the limit on the size of a file."
10318 #: build/C/man3/ulimit.3:59
10323 #. type: Plain text
10324 #: build/C/man3/ulimit.3:63
10326 "(Not implemented for Linux.) Return the maximum possible address of the "
10331 #: build/C/man3/ulimit.3:63
10336 #. type: Plain text
10337 #: build/C/man3/ulimit.3:67
10339 "(Implemented but no symbolic constant provided.) Return the maximum number "
10340 "of files that the calling process can open."
10343 #. type: Plain text
10344 #: build/C/man3/ulimit.3:74
10346 "On success, B<ulimit>() returns a nonnegative value. On error, -1 is "
10347 "returned, and I<errno> is set appropriately."
10350 #. type: Plain text
10351 #: build/C/man3/ulimit.3:78
10352 msgid "A unprivileged process tried to increase a limit."
10355 #. type: Plain text
10356 #: build/C/man3/ulimit.3:83
10357 msgid "SVr4, POSIX.1-2001. POSIX.1-2008 marks B<ulimit>() as obsolete."
10360 #. type: Plain text
10361 #: build/C/man3/ulimit.3:87
10362 msgid "B<bash>(1), B<getrlimit>(2), B<setrlimit>(2), B<sysconf>(3)"
10366 #: build/C/man7/user_namespaces.7:27
10368 msgid "USER_NAMESPACES"
10371 #. type: Plain text
10372 #: build/C/man7/user_namespaces.7:30
10373 msgid "user_namespaces - overview of Linux user namespaces"
10376 #. FIXME: This page says very little about the interaction
10377 #. of user namespaces and keys. Add something on this topic.
10379 #. ============================================================
10380 #. type: Plain text
10381 #: build/C/man7/user_namespaces.7:56
10383 "User namespaces isolate security-related identifiers and attributes, in "
10384 "particular, user IDs and group IDs (see B<credentials>(7)), the root "
10385 "directory, keys (see B<keyctl>(2)), and capabilities (see "
10386 "B<capabilities>(7)). A process's user and group IDs can be different inside "
10387 "and outside a user namespace. In particular, a process can have a normal "
10388 "unprivileged user ID outside a user namespace while at the same time having "
10389 "a user ID of 0 inside the namespace; in other words, the process has full "
10390 "privileges for operations inside the user namespace, but is unprivileged for "
10391 "operations outside the namespace."
10395 #: build/C/man7/user_namespaces.7:56
10397 msgid "Nested namespaces, namespace membership"
10400 #. type: Plain text
10401 #: build/C/man7/user_namespaces.7:69
10403 "User namespaces can be nested; that is, each user namespace\\(emexcept the "
10404 "initial (\"root\") namespace\\(emhas a parent user namespace, and can have "
10405 "zero or more child user namespaces. The parent user namespace is the user "
10406 "namespace of the process that creates the user namespace via a call to "
10407 "B<unshare>(2) or B<clone>(2) with the B<CLONE_NEWUSER> flag."
10410 #. commit 8742f229b635bf1c1c84a3dfe5e47c814c20b5c8
10411 #. FIXME Explain the rationale for this limit. (What is the rationale?)
10412 #. type: Plain text
10413 #: build/C/man7/user_namespaces.7:80
10415 "The kernel imposes (since version 3.11) a limit of 32 nested levels of user "
10416 "namespaces. Calls to B<unshare>(2) or B<clone>(2) that would cause this "
10417 "limit to be exceeded fail with the error B<EUSERS>."
10420 #. type: Plain text
10421 #: build/C/man7/user_namespaces.7:95
10423 "Each process is a member of exactly one user namespace. A process created "
10424 "via B<fork>(2) or B<clone>(2) without the B<CLONE_NEWUSER> flag is a "
10425 "member of the same user namespace as its parent. A single-threaded process "
10426 "can join another user namespace with B<setns>(2) if it has the "
10427 "B<CAP_SYS_ADMIN> in that namespace; upon doing so, it gains a full set of "
10428 "capabilities in that namespace."
10432 #. ============================================================
10433 #. type: Plain text
10434 #: build/C/man7/user_namespaces.7:110
10436 "A call to B<clone>(2) or B<unshare>(2) with the B<CLONE_NEWUSER> flag "
10437 "makes the new child process (for B<clone>(2)) or the caller (for "
10438 "B<unshare>(2)) a member of the new user namespace created by the call."
10442 #: build/C/man7/user_namespaces.7:110
10444 msgid "Capabilities"
10447 #. type: Plain text
10448 #: build/C/man7/user_namespaces.7:132
10450 "The child process created by B<clone>(2) with the B<CLONE_NEWUSER> flag "
10451 "starts out with a complete set of capabilities in the new user namespace. "
10452 "Likewise, a process that creates a new user namespace using B<unshare>(2) "
10453 "or joins an existing user namespace using B<setns>(2) gains a full set of "
10454 "capabilities in that namespace. On the other hand, that process has no "
10455 "capabilities in the parent (in the case of B<clone>(2)) or previous (in the "
10456 "case of B<unshare>(2) and B<setns>(2)) user namespace, even if the new "
10457 "namespace is created or joined by the root user (i.e., a process with user "
10458 "ID 0 in the root namespace)."
10461 #. type: Plain text
10462 #: build/C/man7/user_namespaces.7:142
10464 "Note that a call to B<execve>(2) will cause a process's capabilities to be "
10465 "recalculated in the usual way (see B<capabilities>(7)), so that usually, "
10466 "unless it has a user ID of 0 within the namespace or the executable file has "
10467 "a nonempty inheritable capabilities mask, it will lose all capabilities. "
10468 "See the discussion of user and group ID mappings, below."
10471 #. type: Plain text
10472 #: build/C/man7/user_namespaces.7:167
10474 "A call to B<clone>(2), B<unshare>(2), or B<setns>(2) using the "
10475 "B<CLONE_NEWUSER> flag sets the \"securebits\" flags (see B<capabilities>(7)) "
10476 "to their default values (all flags disabled) in the child (for B<clone>(2)) "
10477 "or caller (for B<unshare>(2), or B<setns>(2)). Note that because the caller "
10478 "no longer has capabilities in its original user namespace after a call to "
10479 "B<setns>(2), it is not possible for a process to reset its \"securebits\" "
10480 "flags while retaining its user namespace membership by using a pair of "
10481 "B<setns>(2) calls to move to another user namespace and then return to its "
10482 "original user namespace."
10485 #. type: Plain text
10486 #: build/C/man7/user_namespaces.7:173
10488 "Having a capability inside a user namespace permits a process to perform "
10489 "operations (that require privilege) only on resources governed by that "
10490 "namespace. The rules for determining whether or not a process has a "
10491 "capability in a particular user namespace are as follows:"
10494 #. In the 3.8 sources, see security/commoncap.c::cap_capable():
10495 #. type: Plain text
10496 #: build/C/man7/user_namespaces.7:189
10498 "A process has a capability inside a user namespace if it is a member of that "
10499 "namespace and it has the capability in its effective capability set. A "
10500 "process can gain capabilities in its effective capability set in various "
10501 "ways. For example, it may execute a set-user-ID program or an executable "
10502 "with associated file capabilities. In addition, a process may gain "
10503 "capabilities via the effect of B<clone>(2), B<unshare>(2), or B<setns>(2), "
10504 "as already described."
10507 #. type: Plain text
10508 #: build/C/man7/user_namespaces.7:193
10510 "If a process has a capability in a user namespace, then it has that "
10511 "capability in all child (and further removed descendant) namespaces as "
10515 #. * The owner of the user namespace in the parent of the
10516 #. * user namespace has all caps.
10517 #. (and likewise associates the effective group ID of the creating process
10518 #. with the namespace).
10519 #. See kernel commit 520d9eabce18edfef76a60b7b839d54facafe1f9 for a fix
10521 #. This includes the case where the process executes a set-user-ID
10522 #. program that confers the effective UID of the creator of the namespace.
10524 #. ============================================================
10525 #. type: Plain text
10526 #: build/C/man7/user_namespaces.7:214
10528 "When a user namespace is created, the kernel records the effective user ID "
10529 "of the creating process as being the \"owner\" of the namespace. A process "
10530 "that resides in the parent of the user namespace and whose effective user ID "
10531 "matches the owner of the namespace has all capabilities in the namespace. "
10532 "By virtue of the previous rule, this means that the process has all "
10533 "capabilities in all further removed descendant user namespaces as well."
10537 #: build/C/man7/user_namespaces.7:214
10539 msgid "Interaction of user namespaces and other types of namespaces"
10542 #. type: Plain text
10543 #: build/C/man7/user_namespaces.7:219
10545 "Starting in Linux 3.8, unprivileged processes can create user namespaces, "
10546 "and mount, PID, IPC, network, and UTS namespaces can be created with just "
10547 "the B<CAP_SYS_ADMIN> capability in the caller's user namespace."
10550 #. type: Plain text
10551 #: build/C/man7/user_namespaces.7:225
10553 "When a non-user-namespace is created, it is owned by the user namespace in "
10554 "which the creating process was a member at the time of the creation of the "
10555 "namespace. Actions on the non-user-namespace require capabilities in the "
10556 "corresponding user namespace."
10559 #. type: Plain text
10560 #: build/C/man7/user_namespaces.7:242
10562 "If B<CLONE_NEWUSER> is specified along with other B<CLONE_NEW*> flags in a "
10563 "single B<clone>(2) or B<unshare>(2) call, the user namespace is guaranteed "
10564 "to be created first, giving the child (B<clone>(2)) or caller "
10565 "(B<unshare>(2)) privileges over the remaining namespaces created by the "
10566 "call. Thus, it is possible for an unprivileged caller to specify this "
10567 "combination of flags."
10571 #. ============================================================
10572 #. type: Plain text
10573 #: build/C/man7/user_namespaces.7:258
10575 "When a new IPC, mount, network, PID, or UTS namespace is created via "
10576 "B<clone>(2) or B<unshare>(2), the kernel records the user namespace of the "
10577 "creating process against the new namespace. (This association can't be "
10578 "changed.) When a process in the new namespace subsequently performs "
10579 "privileged operations that operate on global resources isolated by the "
10580 "namespace, the permission checks are performed according to the process's "
10581 "capabilities in the user namespace that the kernel associated with the new "
10586 #: build/C/man7/user_namespaces.7:258
10588 msgid "Restrictions on mount namespaces"
10591 #. type: Plain text
10592 #: build/C/man7/user_namespaces.7:261
10593 msgid "Note the following points with respect to mount namespaces:"
10596 #. type: Plain text
10597 #: build/C/man7/user_namespaces.7:266
10599 "A mount namespace has an owner user namespace. A mount namespace whose "
10600 "owner user namespace is different from the owner user namespace of its "
10601 "parent mount namespace is considered a less privileged mount namespace."
10604 #. type: Plain text
10605 #: build/C/man7/user_namespaces.7:272
10607 "When creating a less privileged mount namespace, shared mounts are reduced "
10608 "to slave mounts. This ensures that mappings performed in less privileged "
10609 "mount namespaces will not propagate to more privileged mount namespaces."
10613 #. What does "come as a single unit from more privileged mount" mean?
10614 #. type: Plain text
10615 #: build/C/man7/user_namespaces.7:285
10617 "Mounts that come as a single unit from more privileged mount are locked "
10618 "together and may not be separated in a less privileged mount namespace. "
10619 "(The B<unshare>(2) B<CLONE_NEWNS> operation brings across all of the mounts "
10620 "from the original mount namespace as a single unit, and recursive mounts "
10621 "that propagate between mount namespaces propagate as a single unit.)"
10624 #. commit 9566d6742852c527bf5af38af5cbb878dad75705
10625 #. Author: Eric W. Biederman <ebiederm@xmission.com>
10626 #. Date: Mon Jul 28 17:26:07 2014 -0700
10628 #. mnt: Correct permission checks in do_remount
10629 #. type: Plain text
10630 #: build/C/man7/user_namespaces.7:306
10632 "The B<mount>(2) flags B<MS_RDONLY>, B<MS_NOSUID>, B<MS_NOEXEC>, and the "
10633 "\"atime\" flags (B<MS_NOATIME>, B<MS_NODIRATIME>, B<MS_RELATIME>) settings "
10634 "become locked when propagated from a more privileged to a less privileged "
10635 "mount namespace, and may not be changed in the less privileged mount "
10639 #. (As of 3.18-rc1 (in Al Viro's 2014-08-30 vfs.git#for-next tree))
10640 #. type: Plain text
10641 #: build/C/man7/user_namespaces.7:313
10643 "A file or directory that is a mount point in one namespace that is not a "
10644 "mount point in another namespace, may be renamed, unlinked, or removed "
10645 "(B<rmdir>(2)) in the mount namespace in which it is not a mount point "
10646 "(subject to the usual permission checks)."
10650 #. ============================================================
10651 #. type: Plain text
10652 #: build/C/man7/user_namespaces.7:324
10654 "Previously, attempting to unlink, rename, or remove a file or directory that "
10655 "was a mount point in another mount namespace would result in the error "
10656 "B<EBUSY>. That behavior had technical problems of enforcement (e.g., for "
10657 "NFS) and permitted denial-of-service attacks against more privileged "
10658 "users. (i.e., preventing individual files from being updated by bind "
10659 "mounting on top of them)."
10663 #: build/C/man7/user_namespaces.7:324
10665 msgid "User and group ID mappings: uid_map and gid_map"
10668 #. commit 22d917d80e842829d0ca0a561967d728eb1d6303
10669 #. type: Plain text
10670 #: build/C/man7/user_namespaces.7:339
10672 "When a user namespace is created, it starts out without a mapping of user "
10673 "IDs (group IDs) to the parent user namespace. The I</proc/[pid]/uid_map> "
10674 "and I</proc/[pid]/gid_map> files (available since Linux 3.5) expose the "
10675 "mappings for user and group IDs inside the user namespace for the process "
10676 "I<pid>. These files can be read to view the mappings in a user namespace "
10677 "and written to (once) to define the mappings."
10680 #. type: Plain text
10681 #: build/C/man7/user_namespaces.7:345
10683 "The description in the following paragraphs explains the details for "
10684 "I<uid_map>; I<gid_map> is exactly the same, but each instance of \"user ID\" "
10685 "is replaced by \"group ID\"."
10688 #. type: Plain text
10689 #: build/C/man7/user_namespaces.7:359
10691 "The I<uid_map> file exposes the mapping of user IDs from the user namespace "
10692 "of the process I<pid> to the user namespace of the process that opened "
10693 "I<uid_map> (but see a qualification to this point below). In other words, "
10694 "processes that are in different user namespaces will potentially see "
10695 "different values when reading from a particular I<uid_map> file, depending "
10696 "on the user ID mappings for the user namespaces of the reading processes."
10699 #. type: Plain text
10700 #: build/C/man7/user_namespaces.7:371
10702 "Each line in the I<uid_map> file specifies a 1-to-1 mapping of a range of "
10703 "contiguous user IDs between two user namespaces. (When a user namespace is "
10704 "first created, this file is empty.) The specification in each line takes "
10705 "the form of three numbers delimited by white space. The first two numbers "
10706 "specify the starting user ID in each of the two user namespaces. The third "
10707 "number specifies the length of the mapped range. In detail, the fields are "
10708 "interpreted as follows:"
10711 #. type: Plain text
10712 #: build/C/man7/user_namespaces.7:375
10714 "The start of the range of user IDs in the user namespace of the process "
10718 #. type: Plain text
10719 #: build/C/man7/user_namespaces.7:383
10721 "The start of the range of user IDs to which the user IDs specified by field "
10722 "one map. How field two is interpreted depends on whether the process that "
10723 "opened I<uid_map> and the process I<pid> are in the same user namespace, as "
10727 #. type: Plain text
10728 #: build/C/man7/user_namespaces.7:389
10730 "If the two processes are in different user namespaces: field two is the "
10731 "start of a range of user IDs in the user namespace of the process that "
10732 "opened I<uid_map>."
10735 #. type: Plain text
10736 #: build/C/man7/user_namespaces.7:400
10738 "If the two processes are in the same user namespace: field two is the start "
10739 "of the range of user IDs in the parent user namespace of the process "
10740 "I<pid>. This case enables the opener of I<uid_map> (the common case here is "
10741 "opening I</proc/self/uid_map>) to see the mapping of user IDs into the user "
10742 "namespace of the process that created this user namespace."
10745 #. type: Plain text
10746 #: build/C/man7/user_namespaces.7:404
10748 "The length of the range of user IDs that is mapped between the two user "
10752 #. type: Plain text
10753 #: build/C/man7/user_namespaces.7:411
10755 "System calls that return user IDs (group IDs)\\(emfor example, B<getuid>(2), "
10756 "B<getgid>(2), and the credential fields in the structure returned by "
10757 "B<stat>(2)\\(emreturn the user ID (group ID) mapped into the caller's user "
10761 #. type: Plain text
10762 #: build/C/man7/user_namespaces.7:419
10764 "When a process accesses a file, its user and group IDs are mapped into the "
10765 "initial user namespace for the purpose of permission checking and assigning "
10766 "IDs when creating a file. When a process retrieves file user and group IDs "
10767 "via B<stat>(2), the IDs are mapped in the opposite direction, to produce "
10768 "values relative to the process user and group ID mappings."
10771 #. type: Plain text
10772 #: build/C/man7/user_namespaces.7:428
10774 "The initial user namespace has no parent namespace, but, for consistency, "
10775 "the kernel provides dummy user and group ID mapping files for this "
10776 "namespace. Looking at the I<uid_map> file (I<gid_map> is the same) from a "
10777 "shell in the initial namespace shows:"
10780 #. type: Plain text
10781 #: build/C/man7/user_namespaces.7:433
10784 "$ B<cat /proc/$$/uid_map>\n"
10785 " 0 0 4294967295\n"
10789 #. ============================================================
10790 #. type: Plain text
10791 #: build/C/man7/user_namespaces.7:453
10793 "This mapping tells us that the range starting at user ID 0 in this namespace "
10794 "maps to a range starting at 0 in the (nonexistent) parent namespace, and the "
10795 "length of the range is the largest 32-bit unsigned integer. (This "
10796 "deliberately leaves 4294967295 (the 32-bit signed -1 value) unmapped. This "
10797 "is deliberate: I<(uid_t)\\ -\\1> is used in several interfaces (e.g., "
10798 "B<setreuid>(2)) as a way to specify \"no user ID\". Leaving I<(uid_t)\\ "
10799 "-\\1> unmapped and unusable guarantees that there will be no confusion when "
10800 "using these interfaces."
10804 #: build/C/man7/user_namespaces.7:453
10806 msgid "Defining user and group ID mappings: writing to uid_map and gid_map"
10809 #. type: Plain text
10810 #: build/C/man7/user_namespaces.7:469
10812 "After the creation of a new user namespace, the I<uid_map> file of I<one> of "
10813 "the processes in the namespace may be written to I<once> to define the "
10814 "mapping of user IDs in the new user namespace. An attempt to write more "
10815 "than once to a I<uid_map> file in a user namespace fails with the error "
10816 "B<EPERM>. Similar rules apply for I<gid_map> files."
10819 #. type: Plain text
10820 #: build/C/man7/user_namespaces.7:474
10822 "The lines written to I<uid_map> (I<gid_map>) must conform to the following "
10826 #. type: Plain text
10827 #: build/C/man7/user_namespaces.7:477
10829 "The three fields must be valid numbers, and the last field must be greater "
10833 #. type: Plain text
10834 #: build/C/man7/user_namespaces.7:479
10835 msgid "Lines are terminated by newline characters."
10838 #. FIXME(Eric): the restriction "less than" rather than "less than or equal"
10839 #. seems strangely arbitrary. Furthermore, the comment does not agree
10840 #. with the code in kernel/user_namespace.c. Which is correct?
10841 #. type: Plain text
10842 #: build/C/man7/user_namespaces.7:492
10844 "There is an (arbitrary) limit on the number of lines in the file. As at "
10845 "Linux 3.8, the limit is five lines. In addition, the number of bytes "
10846 "written to the file must be less than the system page size, and the write "
10847 "must be performed at the start of the file (i.e., B<lseek>(2) and "
10848 "B<pwrite>(2) can't be used to write to nonzero offsets in the file)."
10851 #. commit 0bd14b4fd72afd5df41e9fd59f356740f22fceba
10852 #. type: Plain text
10853 #: build/C/man7/user_namespaces.7:505
10855 "The range of user IDs (group IDs) specified in each line cannot overlap "
10856 "with the ranges in any other lines. In the initial implementation (Linux "
10857 "3.8), this requirement was satisfied by a simplistic implementation that "
10858 "imposed the further requirement that the values in both field 1 and field 2 "
10859 "of successive lines must be in ascending numerical order, which prevented "
10860 "some otherwise valid maps from being created. Linux 3.9 and later fix this "
10861 "limitation, allowing any valid set of nonoverlapping maps."
10864 #. type: Plain text
10865 #: build/C/man7/user_namespaces.7:507
10866 msgid "At least one line must be written to the file."
10869 #. type: Plain text
10870 #: build/C/man7/user_namespaces.7:510
10871 msgid "Writes that violate the above rules fail with the error B<EINVAL>."
10874 #. type: Plain text
10875 #: build/C/man7/user_namespaces.7:515
10877 "In order for a process to write to the I</proc/[pid]/uid_map> "
10878 "(I</proc/[pid]/gid_map>) file, all of the following requirements must be "
10882 #. type: Plain text
10883 #: build/C/man7/user_namespaces.7:521
10885 "The writing process must have the B<CAP_SETUID> (B<CAP_SETGID>) capability "
10886 "in the user namespace of the process I<pid>."
10889 #. type: Plain text
10890 #: build/C/man7/user_namespaces.7:526
10892 "The writing process must be in either the user namespace of the process "
10893 "I<pid> or inside the parent user namespace of the process I<pid>."
10896 #. type: Plain text
10897 #: build/C/man7/user_namespaces.7:529
10899 "The mapped user IDs (group IDs) must in turn have a mapping in the parent "
10903 #. type: Plain text
10904 #: build/C/man7/user_namespaces.7:531
10905 msgid "One of the following is true:"
10908 #. type: Plain text
10909 #: build/C/man7/user_namespaces.7:541
10911 "The data written to I<uid_map> (I<gid_map>) consists of a single line that "
10912 "maps the writing process's filesystem user ID (group ID) in the parent user "
10913 "namespace to a user ID (group ID) in the user namespace. The usual case "
10914 "here is that this single line provides a mapping for user ID of the process "
10915 "that created the namespace."
10918 #. type: Plain text
10919 #: build/C/man7/user_namespaces.7:548
10921 "The opening process has the B<CAP_SETUID> (B<CAP_SETGID>) capability in the "
10922 "parent user namespace. Thus, a privileged process can make mappings to "
10923 "arbitrary user IDs (group IDs) in the parent user namespace."
10927 #. ============================================================
10928 #. type: Plain text
10929 #: build/C/man7/user_namespaces.7:555
10930 msgid "Writes that violate the above rules fail with the error B<EPERM>."
10934 #: build/C/man7/user_namespaces.7:555
10936 msgid "Unmapped user and group IDs"
10939 #. from_kuid_munged(), from_kgid_munged()
10940 #. type: Plain text
10941 #: build/C/man7/user_namespaces.7:572
10943 "There are various places where an unmapped user ID (group ID) may be "
10944 "exposed to user space. For example, the first process in a new user "
10945 "namespace may call B<getuid>() before a user ID mapping has been defined "
10946 "for the namespace. In most such cases, an unmapped user ID is converted to "
10947 "the overflow user ID (group ID); the default value for the overflow user ID "
10948 "(group ID) is 65534. See the descriptions of "
10949 "I</proc/sys/kernel/overflowuid> and I</proc/sys/kernel/overflowgid> in "
10953 #. also SO_PEERCRED
10954 #. type: Plain text
10955 #: build/C/man7/user_namespaces.7:600
10957 "The cases where unmapped IDs are mapped in this fashion include system calls "
10958 "that return user IDs (B<getuid>(2), B<getgid>(2), and similar), credentials "
10959 "passed over a UNIX domain socket, credentials returned by B<stat>(2), "
10960 "B<waitid>(2), and the System V IPC \"ctl\" B<IPC_STAT> operations, "
10961 "credentials exposed by I</proc/PID/status> and the files in "
10962 "I</proc/sysvipc/*>, credentials returned via the I<si_uid> field in the "
10963 "I<siginfo_t> received with a signal (see B<sigaction>(2)), credentials "
10964 "written to the process accounting file (see B<acct>(5)), and credentials "
10965 "returned with POSIX message queue notifications (see B<mq_notify>(3))."
10968 #. from_kuid(), from_kgid()
10969 #. Also F_GETOWNER_UIDS is an exception
10971 #. ============================================================
10972 #. type: Plain text
10973 #: build/C/man7/user_namespaces.7:615
10975 "There is one notable case where unmapped user and group IDs are I<not> "
10976 "converted to the corresponding overflow ID value. When viewing a I<uid_map> "
10977 "or I<gid_map> file in which there is no mapping for the second field, that "
10978 "field is displayed as 4294967295 (-1 as an unsigned integer);"
10982 #: build/C/man7/user_namespaces.7:615
10984 msgid "Set-user-ID and set-group-ID programs"
10988 #. ============================================================
10989 #. type: Plain text
10990 #: build/C/man7/user_namespaces.7:635
10992 "When a process inside a user namespace executes a set-user-ID (set-group-ID) "
10993 "program, the process's effective user (group) ID inside the namespace is "
10994 "changed to whatever value is mapped for the user (group) ID of the file. "
10995 "However, if either the user I<or> the group ID of the file has no mapping "
10996 "inside the namespace, the set-user-ID (set-group-ID) bit is silently "
10997 "ignored: the new program is executed, but the process's effective user "
10998 "(group) ID is left unchanged. (This mirrors the semantics of executing a "
10999 "set-user-ID or set-group-ID program that resides on a filesystem that was "
11000 "mounted with the B<MS_NOSUID> flag, as described in B<mount>(2).)"
11003 #. type: Plain text
11004 #: build/C/man7/user_namespaces.7:645
11006 "When a process's user and group IDs are passed over a UNIX domain socket to "
11007 "a process in a different user namespace (see the description of "
11008 "B<SCM_CREDENTIALS> in B<unix>(7)), they are translated into the "
11009 "corresponding values as per the receiving process's user and group ID "
11014 #. ============================================================
11015 #. type: Plain text
11016 #: build/C/man7/user_namespaces.7:658
11018 "Over the years, there have been a lot of features that have been added to "
11019 "the Linux kernel that have been made available only to privileged users "
11020 "because of their potential to confuse set-user-ID-root applications. In "
11021 "general, it becomes safe to allow the root user in a user namespace to use "
11022 "those features because it is impossible, while in a user namespace, to gain "
11023 "more privilege than the root user of a user namespace has."
11027 #: build/C/man7/user_namespaces.7:658
11029 msgid "Availability"
11032 #. type: Plain text
11033 #: build/C/man7/user_namespaces.7:666
11035 "Use of user namespaces requires a kernel that is configured with the "
11036 "B<CONFIG_USER_NS> option. User namespaces require support in a range of "
11037 "subsystems across the kernel. When an unsupported subsystem is configured "
11038 "into the kernel, it is not possible to configure user namespaces support."
11041 #. commit d6970d4b726cea6d7a9bc4120814f95c09571fc3
11042 #. type: Plain text
11043 #: build/C/man7/user_namespaces.7:677
11045 "As at Linux 3.8, most relevant subsystems supported user namespaces, but a "
11046 "number of filesystems did not have the infrastructure needed to map user and "
11047 "group IDs between user namespaces. Linux 3.9 added the required "
11048 "infrastructure support for many of the remaining unsupported filesystems "
11049 "(Plan 9 (9P), Andrew File System (AFS), Ceph, CIFS, CODA, NFS, and OCFS2). "
11050 "Linux 3.11 added support the last of the unsupported major filesystems, XFS."
11053 #. type: Plain text
11054 #: build/C/man7/user_namespaces.7:686
11056 "The program below is designed to allow experimenting with user namespaces, "
11057 "as well as other types of namespaces. It creates namespaces as specified by "
11058 "command-line options and then executes a command inside those namespaces. "
11059 "The comments and I<usage()> function inside the program provide a full "
11060 "explanation of the program. The following shell session demonstrates its "
11064 #. type: Plain text
11065 #: build/C/man7/user_namespaces.7:688
11066 msgid "First, we look at the run-time environment:"
11069 #. type: Plain text
11070 #: build/C/man7/user_namespaces.7:697
11073 "$ B<uname -rs> # Need Linux 3.8 or later\n"
11075 "$ B<id -u> # Running as unprivileged user\n"
11081 #. type: Plain text
11082 #: build/C/man7/user_namespaces.7:711
11084 "Now start a new shell in new user (I<-U>), mount (I<-m>), and PID (I<-p>) "
11085 "namespaces, with user ID (I<-M>) and group ID (I<-G>) 1000 mapped to 0 "
11086 "inside the user namespace:"
11089 #. type: Plain text
11090 #: build/C/man7/user_namespaces.7:715
11092 msgid "$ B<./userns_child_exec -p -m -U -M '0 1000 1' -G '0 1000 1' bash>\n"
11095 #. type: Plain text
11096 #: build/C/man7/user_namespaces.7:720
11098 "The shell has PID 1, because it is the first process in the new PID "
11102 #. type: Plain text
11103 #: build/C/man7/user_namespaces.7:725
11106 "bash$ B<echo $$>\n"
11110 #. type: Plain text
11111 #: build/C/man7/user_namespaces.7:730
11113 "Inside the user namespace, the shell has user and group ID 0, and a full set "
11114 "of permitted and effective capabilities:"
11117 #. type: Plain text
11118 #: build/C/man7/user_namespaces.7:740
11121 "bash$ B<cat /proc/$$/status | egrep '^[UG]id'>\n"
11122 "Uid:\t0\t0\t0\t0\n"
11123 "Gid:\t0\t0\t0\t0\n"
11124 "bash$ B<cat /proc/$$/status | egrep '^Cap(Prm|Inh|Eff)'>\n"
11125 "CapInh:\t0000000000000000\n"
11126 "CapPrm:\t0000001fffffffff\n"
11127 "CapEff:\t0000001fffffffff\n"
11130 #. type: Plain text
11131 #: build/C/man7/user_namespaces.7:748
11133 "Mounting a new I</proc> filesystem and listing all of the processes visible "
11134 "in the new PID namespace shows that the shell can't see any processes "
11135 "outside the PID namespace:"
11138 #. type: Plain text
11139 #: build/C/man7/user_namespaces.7:756
11142 "bash$ B<mount -t proc proc /proc>\n"
11144 " PID TTY STAT TIME COMMAND\n"
11145 " 1 pts/3 S 0:00 bash\n"
11146 " 22 pts/3 R+ 0:00 ps ax\n"
11150 #: build/C/man7/user_namespaces.7:758 build/C/man2/seccomp.2:574
11152 msgid "Program source"
11155 #. type: Plain text
11156 #: build/C/man7/user_namespaces.7:762
11158 msgid "/* userns_child_exec.c\n"
11161 #. type: Plain text
11162 #: build/C/man7/user_namespaces.7:764
11164 msgid " Licensed under GNU General Public License v2 or later\n"
11167 #. type: Plain text
11168 #: build/C/man7/user_namespaces.7:780
11171 " Create a child process that executes a shell command in new\n"
11172 " namespace(s); allow UID and GID mappings to be specified when\n"
11173 " creating a user namespace.\n"
11175 "#define _GNU_SOURCE\n"
11176 "#include E<lt>sched.hE<gt>\n"
11177 "#include E<lt>unistd.hE<gt>\n"
11178 "#include E<lt>stdlib.hE<gt>\n"
11179 "#include E<lt>sys/wait.hE<gt>\n"
11180 "#include E<lt>signal.hE<gt>\n"
11181 "#include E<lt>fcntl.hE<gt>\n"
11182 "#include E<lt>stdio.hE<gt>\n"
11183 "#include E<lt>string.hE<gt>\n"
11184 "#include E<lt>limits.hE<gt>\n"
11185 "#include E<lt>errno.hE<gt>\n"
11188 #. type: Plain text
11189 #: build/C/man7/user_namespaces.7:783
11192 "/* A simple error-handling function: print an error message based\n"
11193 " on the value in \\(aqerrno\\(aq and terminate the calling process */\n"
11196 #. type: Plain text
11197 #: build/C/man7/user_namespaces.7:786
11200 "#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\e\n"
11204 #. type: Plain text
11205 #: build/C/man7/user_namespaces.7:791
11208 "struct child_args {\n"
11209 " char **argv; /* Command to be executed by child, with args */\n"
11210 " int pipe_fd[2]; /* Pipe used to synchronize parent and child */\n"
11214 #. type: Plain text
11215 #: build/C/man7/user_namespaces.7:793
11217 msgid "static int verbose;\n"
11220 #. type: Plain text
11221 #: build/C/man7/user_namespaces.7:826
11225 "usage(char *pname)\n"
11227 " fprintf(stderr, \"Usage: %s [options] cmd [arg...]\\en\\en\", pname);\n"
11228 " fprintf(stderr, \"Create a child process that executes a shell \"\n"
11229 " \"command in a new user namespace,\\en\"\n"
11230 " \"and possibly also other new namespace(s).\\en\\en\");\n"
11231 " fprintf(stderr, \"Options can be:\\en\\en\");\n"
11232 "#define fpe(str) fprintf(stderr, \" %s\", str);\n"
11233 " fpe(\"-i New IPC namespace\\en\");\n"
11234 " fpe(\"-m New mount namespace\\en\");\n"
11235 " fpe(\"-n New network namespace\\en\");\n"
11236 " fpe(\"-p New PID namespace\\en\");\n"
11237 " fpe(\"-u New UTS namespace\\en\");\n"
11238 " fpe(\"-U New user namespace\\en\");\n"
11239 " fpe(\"-M uid_map Specify UID map for user namespace\\en\");\n"
11240 " fpe(\"-G gid_map Specify GID map for user namespace\\en\");\n"
11241 " fpe(\"-z Map user\\(aqs UID and GID to 0 in user "
11242 "namespace\\en\");\n"
11243 " fpe(\" (equivalent to: -M \\(aq0 E<lt>uidE<gt> 1\\(aq -G "
11244 "\\(aq0 E<lt>gidE<gt> 1\\(aq)\\en\");\n"
11245 " fpe(\"-v Display verbose messages\\en\");\n"
11246 " fpe(\"\\en\");\n"
11247 " fpe(\"If -z, -M, or -G is specified, -U is required.\\en\");\n"
11248 " fpe(\"It is not permitted to specify both -z and either -M or "
11250 " fpe(\"\\en\");\n"
11251 " fpe(\"Map strings for -M and -G consist of records of the "
11253 " fpe(\"\\en\");\n"
11254 " fpe(\" ID-inside-ns ID-outside-ns len\\en\");\n"
11255 " fpe(\"\\en\");\n"
11256 " fpe(\"A map string can contain multiple records, separated\"\n"
11257 " \" by commas;\\en\");\n"
11258 " fpe(\"the commas are replaced by newlines before writing\"\n"
11259 " \" to map files.\\en\");\n"
11262 #. type: Plain text
11263 #: build/C/man7/user_namespaces.7:834
11266 "/* Update the mapping file \\(aqmap_file\\(aq, with the value provided in\n"
11267 " \\(aqmapping\\(aq, a string that defines a UID or GID mapping. A UID or\n"
11268 " GID mapping consists of one or more newline-delimited records\n"
11272 #. type: Plain text
11273 #: build/C/man7/user_namespaces.7:836
11275 msgid " ID_inside-ns ID-outside-ns length\n"
11278 #. type: Plain text
11279 #: build/C/man7/user_namespaces.7:841
11282 " Requiring the user to supply a string that contains newlines is\n"
11283 " of course inconvenient for command-line use. Thus, we permit the\n"
11284 " use of commas to delimit records in this string, and replace them\n"
11285 " with newlines before writing the string to the file. */\n"
11288 #. type: Plain text
11289 #: build/C/man7/user_namespaces.7:847
11293 "update_map(char *mapping, char *map_file)\n"
11296 " size_t map_len; /* Length of \\(aqmapping\\(aq */\n"
11299 #. type: Plain text
11300 #: build/C/man7/user_namespaces.7:849
11302 msgid " /* Replace commas in mapping string with newlines */\n"
11305 #. type: Plain text
11306 #: build/C/man7/user_namespaces.7:854
11309 " map_len = strlen(mapping);\n"
11310 " for (j = 0; j E<lt> map_len; j++)\n"
11311 " if (mapping[j] == \\(aq,\\(aq)\n"
11312 " mapping[j] = \\(aq\\en\\(aq;\n"
11315 #. type: Plain text
11316 #: build/C/man7/user_namespaces.7:861
11319 " fd = open(map_file, O_RDWR);\n"
11320 " if (fd == -1) {\n"
11321 " fprintf(stderr, \"ERROR: open %s: %s\\en\", map_file,\n"
11322 " strerror(errno));\n"
11323 " exit(EXIT_FAILURE);\n"
11327 #. type: Plain text
11328 #: build/C/man7/user_namespaces.7:867
11331 " if (write(fd, mapping, map_len) != map_len) {\n"
11332 " fprintf(stderr, \"ERROR: write %s: %s\\en\", map_file,\n"
11333 " strerror(errno));\n"
11334 " exit(EXIT_FAILURE);\n"
11338 #. type: Plain text
11339 #: build/C/man7/user_namespaces.7:870
11346 #. type: Plain text
11347 #: build/C/man7/user_namespaces.7:876
11350 "static int /* Start function for cloned child */\n"
11351 "childFunc(void *arg)\n"
11353 " struct child_args *args = (struct child_args *) arg;\n"
11357 #. type: Plain text
11358 #: build/C/man7/user_namespaces.7:881
11361 " /* Wait until the parent has updated the UID and GID mappings.\n"
11362 " See the comment in main(). We wait for end of file on a\n"
11363 " pipe that will be closed by the parent process once it has\n"
11364 " updated the mappings. */\n"
11367 #. type: Plain text
11368 #: build/C/man7/user_namespaces.7:890
11371 " close(args-E<gt>pipe_fd[1]); /* Close our descriptor for the write\n"
11372 " end of the pipe so that we see EOF\n"
11373 " when parent closes its descriptor */\n"
11374 " if (read(args-E<gt>pipe_fd[0], &ch, 1) != 0) {\n"
11375 " fprintf(stderr,\n"
11376 " \"Failure in child: read from pipe returned != 0\\en\");\n"
11377 " exit(EXIT_FAILURE);\n"
11381 #. type: Plain text
11382 #: build/C/man7/user_namespaces.7:892
11384 msgid " /* Execute a shell command */\n"
11387 #. type: Plain text
11388 #: build/C/man7/user_namespaces.7:897
11391 " printf(\"About to exec %s\\en\", args-E<gt>argv[0]);\n"
11392 " execvp(args-E<gt>argv[0], args-E<gt>argv);\n"
11393 " errExit(\"execvp\");\n"
11397 #. type: Plain text
11398 #: build/C/man7/user_namespaces.7:899
11400 msgid "#define STACK_SIZE (1024 * 1024)\n"
11403 #. type: Plain text
11404 #: build/C/man7/user_namespaces.7:901
11406 msgid "static char child_stack[STACK_SIZE]; /* Space for child\\(aqs stack */\n"
11409 #. type: Plain text
11410 #: build/C/man7/user_namespaces.7:912
11414 "main(int argc, char *argv[])\n"
11416 " int flags, opt, map_zero;\n"
11417 " pid_t child_pid;\n"
11418 " struct child_args args;\n"
11419 " char *uid_map, *gid_map;\n"
11420 " const int MAP_BUF_SIZE = 100;\n"
11421 " char map_buf[MAP_BUF_SIZE];\n"
11422 " char map_path[PATH_MAX];\n"
11425 #. type: Plain text
11426 #: build/C/man7/user_namespaces.7:919
11429 " /* Parse command-line options. The initial \\(aq+\\(aq character in\n"
11430 " the final getopt() argument prevents GNU-style permutation\n"
11431 " of command-line options. That\\(aqs useful, since sometimes\n"
11432 " the \\(aqcommand\\(aq to be executed by this program itself\n"
11433 " has command-line options. We don\\(aqt want getopt() to treat\n"
11434 " those as options to this program. */\n"
11437 #. type: Plain text
11438 #: build/C/man7/user_namespaces.7:940
11443 " gid_map = NULL;\n"
11444 " uid_map = NULL;\n"
11446 " while ((opt = getopt(argc, argv, \"+imnpuUM:G:zv\")) != -1) {\n"
11447 " switch (opt) {\n"
11448 " case \\(aqi\\(aq: flags |= CLONE_NEWIPC; break;\n"
11449 " case \\(aqm\\(aq: flags |= CLONE_NEWNS; break;\n"
11450 " case \\(aqn\\(aq: flags |= CLONE_NEWNET; break;\n"
11451 " case \\(aqp\\(aq: flags |= CLONE_NEWPID; break;\n"
11452 " case \\(aqu\\(aq: flags |= CLONE_NEWUTS; break;\n"
11453 " case \\(aqv\\(aq: verbose = 1; break;\n"
11454 " case \\(aqz\\(aq: map_zero = 1; break;\n"
11455 " case \\(aqM\\(aq: uid_map = optarg; break;\n"
11456 " case \\(aqG\\(aq: gid_map = optarg; break;\n"
11457 " case \\(aqU\\(aq: flags |= CLONE_NEWUSER; break;\n"
11458 " default: usage(argv[0]);\n"
11463 #. type: Plain text
11464 #: build/C/man7/user_namespaces.7:942
11466 msgid " /* -M or -G without -U is nonsensical */\n"
11469 #. type: Plain text
11470 #: build/C/man7/user_namespaces.7:947
11473 " if (((uid_map != NULL || gid_map != NULL || map_zero) &&\n"
11474 " !(flags & CLONE_NEWUSER)) ||\n"
11475 " (map_zero && (uid_map != NULL || gid_map != NULL)))\n"
11476 " usage(argv[0]);\n"
11479 #. type: Plain text
11480 #: build/C/man7/user_namespaces.7:949
11482 msgid " args.argv = &argv[optind];\n"
11485 #. type: Plain text
11486 #: build/C/man7/user_namespaces.7:959
11489 " /* We use a pipe to synchronize the parent and child, in order to\n"
11490 " ensure that the parent sets the UID and GID maps before the child\n"
11491 " calls execve(). This ensures that the child maintains its\n"
11492 " capabilities during the execve() in the common case where we\n"
11493 " want to map the child\\(aqs effective user ID to 0 in the new user\n"
11494 " namespace. Without this synchronization, the child would lose\n"
11495 " its capabilities if it performed an execve() with nonzero\n"
11496 " user IDs (see the capabilities(7) man page for details of the\n"
11497 " transformation of a process\\(aqs capabilities during execve()). */\n"
11500 #. type: Plain text
11501 #: build/C/man7/user_namespaces.7:962
11504 " if (pipe(args.pipe_fd) == -1)\n"
11505 " errExit(\"pipe\");\n"
11508 #. type: Plain text
11509 #: build/C/man7/user_namespaces.7:964
11511 msgid " /* Create the child in new namespace(s) */\n"
11514 #. type: Plain text
11515 #: build/C/man7/user_namespaces.7:969
11518 " child_pid = clone(childFunc, child_stack + STACK_SIZE,\n"
11519 " flags | SIGCHLD, &args);\n"
11520 " if (child_pid == -1)\n"
11521 " errExit(\"clone\");\n"
11524 #. type: Plain text
11525 #: build/C/man7/user_namespaces.7:971
11527 msgid " /* Parent falls through to here */\n"
11530 #. type: Plain text
11531 #: build/C/man7/user_namespaces.7:975
11535 " printf(\"%s: PID of child created by clone() is %ld\\en\",\n"
11536 " argv[0], (long) child_pid);\n"
11539 #. type: Plain text
11540 #: build/C/man7/user_namespaces.7:977
11542 msgid " /* Update the UID and GID maps in the child */\n"
11545 #. type: Plain text
11546 #: build/C/man7/user_namespaces.7:996
11549 " if (uid_map != NULL || map_zero) {\n"
11550 " snprintf(map_path, PATH_MAX, \"/proc/%ld/uid_map\",\n"
11551 " (long) child_pid);\n"
11552 " if (map_zero) {\n"
11553 " snprintf(map_buf, MAP_BUF_SIZE, \"0 %ld 1\", (long) getuid());\n"
11554 " uid_map = map_buf;\n"
11556 " update_map(uid_map, map_path);\n"
11558 " if (gid_map != NULL || map_zero) {\n"
11559 " snprintf(map_path, PATH_MAX, \"/proc/%ld/gid_map\",\n"
11560 " (long) child_pid);\n"
11561 " if (map_zero) {\n"
11562 " snprintf(map_buf, MAP_BUF_SIZE, \"0 %ld 1\", (long) getgid());\n"
11563 " gid_map = map_buf;\n"
11565 " update_map(gid_map, map_path);\n"
11569 #. type: Plain text
11570 #: build/C/man7/user_namespaces.7:999
11573 " /* Close the write end of the pipe, to signal to the child that we\n"
11574 " have updated the UID and GID maps */\n"
11577 #. type: Plain text
11578 #: build/C/man7/user_namespaces.7:1001
11580 msgid " close(args.pipe_fd[1]);\n"
11583 #. type: Plain text
11584 #: build/C/man7/user_namespaces.7:1004
11587 " if (waitpid(child_pid, NULL, 0) == -1) /* Wait for child */\n"
11588 " errExit(\"waitpid\");\n"
11591 #. type: Plain text
11592 #: build/C/man7/user_namespaces.7:1007
11596 " printf(\"%s: terminating\\en\", argv[0]);\n"
11599 #. type: Plain text
11600 #: build/C/man7/user_namespaces.7:1010
11603 " exit(EXIT_SUCCESS);\n"
11607 #. From the shadow package
11608 #. From the shadow package
11609 #. From the shadow package
11610 #. From the shadow package
11611 #. type: Plain text
11612 #: build/C/man7/user_namespaces.7:1024
11614 "B<newgidmap>(1), B<newuidmap>(1), B<clone>(2), B<setns>(2), B<unshare>(2), "
11615 "B<proc>(5), B<subgid>(5), B<subuid>(5), B<credentials>(7), "
11616 "B<capabilities>(7), B<namespaces>(7), B<pid_namespaces>(7)"
11619 #. type: Plain text
11620 #: build/C/man7/user_namespaces.7:1026
11621 msgid "The kernel source file I<Documentation/namespaces/resource-control.txt>."
11625 #: build/C/man2/seccomp.2:27
11630 #. type: Plain text
11631 #: build/C/man2/seccomp.2:30
11632 msgid "seccomp - operate on Secure Computing state of the process"
11635 #. Kees Cook noted: Anything that uses SECCOMP_RET_TRACE returns will
11636 #. need <sys/ptrace.h>
11637 #. type: Plain text
11638 #: build/C/man2/seccomp.2:39
11641 "B<#include E<lt>linux/seccomp.hE<gt>>\n"
11642 "B<#include E<lt>linux/filter.hE<gt>>\n"
11643 "B<#include E<lt>linux/audit.hE<gt>>\n"
11644 "B<#include E<lt>linux/signal.hE<gt>>\n"
11645 "B<#include E<lt>sys/ptrace.hE<gt>>\n"
11648 #. type: Plain text
11649 #: build/C/man2/seccomp.2:42
11652 "B<int seccomp(unsigned int >I<operation>B<, unsigned int >I<flags>B<, void "
11656 #. type: Plain text
11657 #: build/C/man2/seccomp.2:48
11659 "The B<seccomp>() system call operates on the Secure Computing (seccomp) "
11660 "state of the calling process."
11663 #. type: Plain text
11664 #: build/C/man2/seccomp.2:52
11665 msgid "Currently, Linux supports the following I<operation> values:"
11669 #: build/C/man2/seccomp.2:52
11671 msgid "B<SECCOMP_SET_MODE_STRICT>"
11674 #. type: Plain text
11675 #: build/C/man2/seccomp.2:66
11677 "The only system calls that the calling thread is permitted to make are "
11678 "B<read>(2), B<write>(2), B<_exit>(2), and B<sigreturn>(2). Other system "
11679 "calls result in the delivery of a B<SIGKILL> signal. Strict secure "
11680 "computing mode is useful for number-crunching applications that may need to "
11681 "execute untrusted byte code, perhaps obtained by reading from a pipe or "
11685 #. type: Plain text
11686 #: build/C/man2/seccomp.2:70
11688 "This operation is available only if the kernel is configured with "
11689 "B<CONFIG_SECCOMP> enabled."
11692 #. type: Plain text
11693 #: build/C/man2/seccomp.2:76
11694 msgid "The value of I<flags> must be 0, and I<args> must be NULL."
11697 #. type: Plain text
11698 #: build/C/man2/seccomp.2:78
11699 msgid "This operation is functionally identical to the call:"
11702 #. type: Plain text
11703 #: build/C/man2/seccomp.2:80
11705 msgid " prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT);\n"
11709 #: build/C/man2/seccomp.2:80
11711 msgid "B<SECCOMP_SET_MODE_FILTER>"
11714 #. type: Plain text
11715 #: build/C/man2/seccomp.2:95
11717 "The system calls allowed are defined by a pointer to a Berkeley Packet "
11718 "Filter (BPF) passed via I<args>. This argument is a pointer to a I<struct\\ "
11719 "sock_fprog>; it can be designed to filter arbitrary system calls and system "
11720 "call arguments. If the filter is invalid, B<seccomp>() fails, returning "
11721 "B<EINVAL> in I<errno>."
11724 #. type: Plain text
11725 #: build/C/man2/seccomp.2:107
11727 "If B<fork>(2) or B<clone>(2) is allowed by the filter, any child processes "
11728 "will be constrained to the same system call filters as the parent. If "
11729 "B<execve>(2) is allowed, the existing filters will be preserved across a "
11730 "call to B<execve>(2)."
11733 #. type: Plain text
11734 #: build/C/man2/seccomp.2:117
11736 "In order to use the B<SECCOMP_SET_MODE_FILTER> operation, either the caller "
11737 "must have the B<CAP_SYS_ADMIN> capability, or the thread must already have "
11738 "the I<no_new_privs> bit set. If that bit was not already set by an ancestor "
11739 "of this thread, the thread must make the following call:"
11742 #. type: Plain text
11743 #: build/C/man2/seccomp.2:119
11745 msgid " prctl(PR_SET_NO_NEW_PRIVS, 1);\n"
11748 #. type: Plain text
11749 #: build/C/man2/seccomp.2:138
11751 "Otherwise, the B<SECCOMP_SET_MODE_FILTER> operation will fail and return "
11752 "B<EACCES> in I<errno>. This requirement ensures that an unprivileged "
11753 "process cannot apply a malicious filter and then invoke a set-user-ID or "
11754 "other privileged program using B<execve>(2), thus potentially compromising "
11755 "that program. (Such a malicious filter might, for example, cause an attempt "
11756 "to use B<setuid>(2) to set the caller's user IDs to non-zero values to "
11757 "instead return 0 without actually making the system call. Thus, the program "
11758 "might be tricked into retaining superuser privileges in circumstances where "
11759 "it is possible to influence it to do dangerous things because it did not "
11760 "actually drop privileges.)"
11763 #. type: Plain text
11764 #: build/C/man2/seccomp.2:146
11766 "If B<prctl>(2) or B<seccomp>(2) is allowed by the attached filter, further "
11767 "filters may be added. This will increase evaluation time, but allows for "
11768 "further reduction of the attack surface during execution of a thread."
11771 #. type: Plain text
11772 #: build/C/man2/seccomp.2:152
11774 "The B<SECCOMP_SET_MODE_FILTER> operation is available only if the kernel is "
11775 "configured with B<CONFIG_SECCOMP_FILTER> enabled."
11778 #. type: Plain text
11779 #: build/C/man2/seccomp.2:156
11780 msgid "When I<flags> is 0, this operation is functionally identical to the call:"
11783 #. type: Plain text
11784 #: build/C/man2/seccomp.2:158
11786 msgid " prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, args);\n"
11789 #. type: Plain text
11790 #: build/C/man2/seccomp.2:162
11791 msgid "The recognized I<flags> are:"
11795 #: build/C/man2/seccomp.2:163
11797 msgid "B<SECCOMP_FILTER_FLAG_TSYNC>"
11800 #. type: Plain text
11801 #: build/C/man2/seccomp.2:171
11803 "When adding a new filter, synchronize all other threads of the calling "
11804 "process to the same seccomp filter tree. A \"filter tree\" is the ordered "
11805 "list of filters attached to a thread. (Attaching identical filters in "
11806 "separate B<seccomp>() calls results in different filters from this "
11810 #. type: Plain text
11811 #: build/C/man2/seccomp.2:179
11813 "If any thread cannot synchronize to the same filter tree, the call will not "
11814 "attach the new seccomp filter, and will fail, returning the first thread ID "
11815 "found that cannot synchronize. Synchronization will fail if another thread "
11816 "in the same process is in B<SECCOMP_MODE_STRICT> or if it has attached new "
11817 "seccomp filters to itself, diverging from the calling thread's filter tree."
11821 #: build/C/man2/seccomp.2:180
11826 #. type: Plain text
11827 #: build/C/man2/seccomp.2:185
11829 "When adding filters via B<SECCOMP_SET_MODE_FILTER>, I<args> points to a "
11833 #. type: Plain text
11834 #: build/C/man2/seccomp.2:193
11837 "struct sock_fprog {\n"
11838 " unsigned short len; /* Number of BPF instructions */\n"
11839 " struct sock_filter *filter; /* Pointer to array of\n"
11840 " BPF instructions */\n"
11844 #. type: Plain text
11845 #: build/C/man2/seccomp.2:197
11846 msgid "Each program must contain one or more BPF instructions:"
11849 #. type: Plain text
11850 #: build/C/man2/seccomp.2:206
11853 "struct sock_filter { /* Filter block */\n"
11854 " __u16 code; /* Actual filter code */\n"
11855 " __u8 jt; /* Jump true */\n"
11856 " __u8 jf; /* Jump false */\n"
11857 " __u32 k; /* Generic multiuse field */\n"
11861 #. type: Plain text
11862 #: build/C/man2/seccomp.2:213
11864 "When executing the instructions, the BPF program operates on the system call "
11865 "information made available (i.e., use the B<BPF_ABS> addressing mode) as a "
11866 "buffer of the following form:"
11869 #. type: Plain text
11870 #: build/C/man2/seccomp.2:223
11873 "struct seccomp_data {\n"
11874 " int nr; /* System call number */\n"
11875 " __u32 arch; /* AUDIT_ARCH_* value\n"
11876 " (see E<lt>linux/audit.hE<gt>) */\n"
11877 " __u64 instruction_pointer; /* CPU instruction pointer */\n"
11878 " __u64 args[6]; /* Up to 6 system call arguments */\n"
11882 #. type: Plain text
11883 #: build/C/man2/seccomp.2:234
11885 "A seccomp filter returns a 32-bit value consisting of two parts: the most "
11886 "significant 16 bits (corresponding to the mask defined by the constant "
11887 "B<SECCOMP_RET_ACTION>) contain one of the \"action\" values listed below; "
11888 "the least significant 16-bits (defined by the constant B<SECCOMP_RET_DATA>) "
11889 "are \"data\" to be associated with this return value."
11892 #. type: Plain text
11893 #: build/C/man2/seccomp.2:242
11895 "If multiple filters exist, they are all executed, in reverse order of their "
11896 "addition to the filter tree (i.e., the most recently installed filter is "
11897 "executed first). The return value for the evaluation of a given system call "
11898 "is the first-seen B<SECCOMP_RET_ACTION> value of highest precedence (along "
11899 "with its accompanying data) returned by execution of all of the filters."
11902 #. type: Plain text
11903 #: build/C/man2/seccomp.2:245
11905 "In decreasing order of precedence, the values that may be returned by a "
11906 "seccomp filter are:"
11910 #: build/C/man2/seccomp.2:245
11912 msgid "B<SECCOMP_RET_KILL>"
11915 #. type: Plain text
11916 #: build/C/man2/seccomp.2:254
11918 "This value results in the process exiting immediately without executing the "
11919 "system call. The process terminates as though killed by a B<SIGSYS> signal "
11920 "(I<not> B<SIGKILL>)."
11924 #: build/C/man2/seccomp.2:254
11926 msgid "B<SECCOMP_RET_TRAP>"
11929 #. type: Plain text
11930 #: build/C/man2/seccomp.2:264
11932 "This value results in the kernel sending a B<SIGSYS> signal to the "
11933 "triggering process without executing the system call. Various fields will "
11934 "be set in the I<siginfo_t> structure (see B<sigaction>(2)) associated with "
11938 #. type: Plain text
11939 #: build/C/man2/seccomp.2:269
11940 msgid "I<si_signo> will contain B<SIGSYS>."
11943 #. type: Plain text
11944 #: build/C/man2/seccomp.2:272
11945 msgid "I<si_call_addr> will show the address of the system call instruction."
11948 #. type: Plain text
11949 #: build/C/man2/seccomp.2:277
11950 msgid "I<si_syscall> and I<si_arch> will indicate which system call was attempted."
11953 #. type: Plain text
11954 #: build/C/man2/seccomp.2:281
11955 msgid "I<si_code> will contain B<SYS_SECCOMP>."
11958 #. type: Plain text
11959 #: build/C/man2/seccomp.2:286
11961 "I<si_errno> will contain the B<SECCOMP_RET_DATA> portion of the filter "
11965 #. type: Plain text
11966 #: build/C/man2/seccomp.2:295
11968 "The program counter will be as though the system call happened (i.e., it "
11969 "will not point to the system call instruction). The return value register "
11970 "will contain an architecture-dependent value; if resuming execution, set it "
11971 "to something appropriate for the system call. (The architecture dependency "
11972 "is because replacing it with B<ENOSYS> could overwrite some useful "
11977 #: build/C/man2/seccomp.2:295
11979 msgid "B<SECCOMP_RET_ERRNO>"
11982 #. type: Plain text
11983 #: build/C/man2/seccomp.2:302
11985 "This value results in the B<SECCOMP_RET_DATA> portion of the filter's return "
11986 "value being passed to user space as the I<errno> value without executing the "
11991 #: build/C/man2/seccomp.2:302
11993 msgid "B<SECCOMP_RET_TRACE>"
11996 #. type: Plain text
11997 #: build/C/man2/seccomp.2:312
11999 "When returned, this value will cause the kernel to attempt to notify a "
12000 "B<ptrace>(2)-based tracer prior to executing the system call. If there is "
12001 "no tracer present, the system call is not executed and returns a failure "
12002 "status with I<errno> set to B<ENOSYS>."
12005 #. type: Plain text
12006 #: build/C/man2/seccomp.2:323
12008 "A tracer will be notified if it requests B<PTRACE_O_TRACESECCOMP> using "
12009 "I<ptrace(PTRACE_SETOPTIONS)>. The tracer will be notified of a "
12010 "B<PTRACE_EVENT_SECCOMP> and the B<SECCOMP_RET_DATA> portion of the filter's "
12011 "return value will be available to the tracer via B<PTRACE_GETEVENTMSG>."
12014 #. type: Plain text
12015 #: build/C/man2/seccomp.2:330
12017 "The tracer can skip the system call by changing the system call number to "
12018 "-1. Alternatively, the tracer can change the system call requested by "
12019 "changing the system call to a valid system call number. If the tracer asks "
12020 "to skip the system call, then the system call will appear to return the "
12021 "value that the tracer puts in the return value register."
12024 #. type: Plain text
12025 #: build/C/man2/seccomp.2:339
12027 "The seccomp check will not be run again after the tracer is notified. (This "
12028 "means that seccomp-based sandboxes B<must not> allow use of "
12029 "B<ptrace>(2)\\(emeven of other sandboxed processes\\(emwithout extreme care; "
12030 "ptracers can use this mechanism to escape from the seccomp sandbox.)"
12034 #: build/C/man2/seccomp.2:339
12036 msgid "B<SECCOMP_RET_ALLOW>"
12039 #. type: Plain text
12040 #: build/C/man2/seccomp.2:342
12041 msgid "This value results in the system call being executed."
12044 #. type: Plain text
12045 #: build/C/man2/seccomp.2:358
12047 "On success, B<seccomp>() returns 0. On error, if "
12048 "B<SECCOMP_FILTER_FLAG_TSYNC> was used, the return value is the ID of the "
12049 "thread that caused the synchronization failure. (This ID is a kernel thread "
12050 "ID of the type returned by B<clone>(2) and B<gettid>(2).) On other errors, "
12051 "-1 is returned, and I<errno> is set to indicate the cause of the error."
12054 #. type: Plain text
12055 #: build/C/man2/seccomp.2:361
12056 msgid "B<seccomp>() can fail for the following reasons:"
12060 #: build/C/man2/seccomp.2:361
12065 #. type: Plain text
12066 #: build/C/man2/seccomp.2:369
12068 "The caller did not have the B<CAP_SYS_ADMIN> capability, or had not set "
12069 "I<no_new_privs> before using B<SECCOMP_SET_MODE_FILTER>."
12072 #. type: Plain text
12073 #: build/C/man2/seccomp.2:373
12074 msgid "I<args> was not a valid address."
12077 #. type: Plain text
12078 #: build/C/man2/seccomp.2:380
12079 msgid "I<operation> is unknown; or I<flags> are invalid for the given I<operation>."
12082 #. type: Plain text
12083 #: build/C/man2/seccomp.2:387
12085 "I<operation> included B<BPF_ABS>, but the specified offset was not aligned "
12086 "to a 32-bit boundary or exceeded I<sizeof(struct\\ seccomp_data)>."
12089 #. See kernel/seccomp.c::seccomp_may_assign_mode() in 3.18 sources
12090 #. type: Plain text
12091 #: build/C/man2/seccomp.2:393
12093 "A secure computing mode has already been set, and I<operation> differs from "
12094 "the existing setting."
12097 #. See stub kernel/seccomp.c::seccomp_set_mode_filter() in 3.18 sources
12098 #. type: Plain text
12099 #: build/C/man2/seccomp.2:402
12101 "I<operation> specified B<SECCOMP_SET_MODE_FILTER>, but the kernel was not "
12102 "built with B<CONFIG_SECCOMP_FILTER> enabled."
12105 #. type: Plain text
12106 #: build/C/man2/seccomp.2:413
12108 "I<operation> specified B<SECCOMP_SET_MODE_FILTER>, but the filter program "
12109 "pointed to by I<args> was not valid or the length of the filter program was "
12110 "zero or exceeded B<BPF_MAXINSNS> (4096) instructions. B<EINVAL>"
12113 #. ENOMEM in kernel/seccomp.c::seccomp_attach_filter() in 3.18 sources
12114 #. type: Plain text
12115 #: build/C/man2/seccomp.2:426
12117 "The total length of all filter programs attached to the calling thread would "
12118 "exceed B<MAX_INSNS_PER_PATH> (32768) instructions. Note that for the "
12119 "purposes of calculating this limit, each already existing filter program "
12120 "incurs an overhead penalty of 4 instructions."
12123 #. type: Plain text
12124 #: build/C/man2/seccomp.2:430
12126 "Another thread caused a failure during thread sync, but its ID could not be "
12130 #. FIXME . Add glibc version
12131 #. type: Plain text
12132 #: build/C/man2/seccomp.2:435
12133 msgid "The B<seccomp>() system call first appeared in Linux 3.17."
12136 #. type: Plain text
12137 #: build/C/man2/seccomp.2:439
12138 msgid "The B<seccomp>() system call is a nonstandard Linux extension."
12141 #. type: Plain text
12142 #: build/C/man2/seccomp.2:446
12144 "The I<Seccomp> field of the I</proc/[pid]/status> file provides a method of "
12145 "viewing the seccomp mode of a process; see B<proc>(5)."
12148 #. type: Plain text
12149 #: build/C/man2/seccomp.2:453
12151 "B<seccomp>() provides a superset of the functionality provided by the "
12152 "B<prctl>(2) B<PR_SET_SECCOMP> operation (which does not support I<flags>)."
12156 #: build/C/man2/seccomp.2:453
12158 msgid "Seccomp-specific BPF details"
12161 #. type: Plain text
12162 #: build/C/man2/seccomp.2:455
12163 msgid "Note the following BPF details specific to seccomp filters:"
12166 #. type: Plain text
12167 #: build/C/man2/seccomp.2:463
12169 "The B<BPF_H> and B<BPF_B> size modifiers are not supported: all operations "
12170 "must load and store (4-byte) words (B<BPF_W>)."
12173 #. type: Plain text
12174 #: build/C/man2/seccomp.2:469
12176 "To access the contents of the I<seccomp_data> buffer, use the B<BPF_ABS> "
12177 "addressing mode modifier."
12180 #. type: Plain text
12181 #: build/C/man2/seccomp.2:476
12183 "The B<BPF_LEN> addressing mode modifier yields an immediate mode operand "
12184 "whose value is the size of the I<seccomp_data> buffer."
12187 #. type: Plain text
12188 #: build/C/man2/seccomp.2:482
12190 "The program below accepts four or more arguments. The first three arguments "
12191 "are a system call number, a numeric architecture identifier, and an error "
12192 "number. The program uses these values to construct a BPF filter that is "
12193 "used at run time to perform the following checks:"
12197 #: build/C/man2/seccomp.2:482
12202 #. type: Plain text
12203 #: build/C/man2/seccomp.2:486
12205 "If the program is not running on the specified architecture, the BPF filter "
12206 "causes system calls to fail with the error B<ENOSYS>."
12210 #: build/C/man2/seccomp.2:486
12215 #. type: Plain text
12216 #: build/C/man2/seccomp.2:491
12218 "If the program attempts to execute the system call with the specified "
12219 "number, the BPF filter causes the system call to fail, with I<errno> being "
12220 "set to the specified error number."
12223 #. type: Plain text
12224 #: build/C/man2/seccomp.2:500
12226 "The remaining command-line arguments specify the pathname and additional "
12227 "arguments of a program that the example program should attempt to execute "
12228 "using B<execve>(3) (a library function that employs the B<execve>(2) "
12229 "system call). Some example runs of the program are shown below."
12232 #. type: Plain text
12233 #: build/C/man2/seccomp.2:504
12235 "First, we display the architecture that we are running on (x86-64) and then "
12236 "construct a shell function that looks up system call numbers on this "
12240 #. type: Plain text
12241 #: build/C/man2/seccomp.2:513
12246 "$ B<syscall_nr() {\n"
12247 " cat /usr/src/linux/arch/x86/syscalls/syscall_64.tbl | \\e\n"
12248 " awk '$2 != \"x32\" && $3 == \"'$1'\" { print $1 }'\n"
12252 #. type: Plain text
12253 #: build/C/man2/seccomp.2:520
12255 "When the BPF filter rejects a system call (case [2] above), it causes the "
12256 "system call to fail with the error number specified on the command line. In "
12257 "the experiments shown here, we'll use error number 99:"
12260 #. type: Plain text
12261 #: build/C/man2/seccomp.2:525
12265 "EADDRNOTAVAIL 99 Cannot assign requested address\n"
12268 #. type: Plain text
12269 #: build/C/man2/seccomp.2:533
12271 "In the following example, we attempt to run the command B<whoami>(1), but "
12272 "the BPF filter rejects the B<execve>(2) system call, so that the command is "
12273 "not even executed:"
12276 #. type: Plain text
12277 #: build/C/man2/seccomp.2:544
12280 "$ B<syscall_nr execve>\n"
12283 "Usage: ./a.out E<lt>syscall_nrE<gt> E<lt>archE<gt> E<lt>errnoE<gt> "
12284 "E<lt>progE<gt> [E<lt>argsE<gt>]\n"
12285 "Hint for E<lt>archE<gt>: AUDIT_ARCH_I386: 0x40000003\n"
12286 " AUDIT_ARCH_X86_64: 0xC000003E\n"
12287 "$ B<./a.out 59 0xC000003E 99 /bin/whoami>\n"
12288 "execv: Cannot assign requested address\n"
12291 #. type: Plain text
12292 #: build/C/man2/seccomp.2:552
12294 "In the next example, the BPF filter rejects the B<write>(2) system call, so "
12295 "that, although it is successfully started, the B<whoami>(1) command is not "
12296 "able to write output:"
12299 #. type: Plain text
12300 #: build/C/man2/seccomp.2:558
12303 "$ B<syscall_nr write>\n"
12305 "$ B<./a.out 1 0xC000003E 99 /bin/whoami>\n"
12308 #. type: Plain text
12309 #: build/C/man2/seccomp.2:565
12311 "In the final example, the BPF filter rejects a system call that is not used "
12312 "by the B<whoami>(1) command, so it is able to successfully execute and "
12316 #. type: Plain text
12317 #: build/C/man2/seccomp.2:572
12320 "$ B<syscall_nr preadv>\n"
12322 "$ B<./a.out 295 0xC000003E 99 /bin/whoami>\n"
12326 #. type: Plain text
12327 #: build/C/man2/seccomp.2:586
12330 "#include E<lt>errno.hE<gt>\n"
12331 "#include E<lt>stddef.hE<gt>\n"
12332 "#include E<lt>stdio.hE<gt>\n"
12333 "#include E<lt>stdlib.hE<gt>\n"
12334 "#include E<lt>unistd.hE<gt>\n"
12335 "#include E<lt>linux/audit.hE<gt>\n"
12336 "#include E<lt>linux/filter.hE<gt>\n"
12337 "#include E<lt>linux/seccomp.hE<gt>\n"
12338 "#include E<lt>sys/prctl.hE<gt>\n"
12341 #. type: Plain text
12342 #: build/C/man2/seccomp.2:595
12346 "install_filter(int syscall_nr, int t_arch, int f_errno)\n"
12348 " struct sock_filter filter[] = {\n"
12349 " /* [0] Load architecture from 'seccomp_data' buffer into\n"
12350 " accumulator */\n"
12351 " BPF_STMT(BPF_LD | BPF_W | BPF_ABS,\n"
12352 " (offsetof(struct seccomp_data, arch))),\n"
12355 #. type: Plain text
12356 #: build/C/man2/seccomp.2:599
12359 " /* [1] Jump forward 4 instructions if architecture does not\n"
12360 " match 't_arch' */\n"
12361 " BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, t_arch, 0, 4),\n"
12364 #. type: Plain text
12365 #: build/C/man2/seccomp.2:604
12368 " /* [2] Load system call number from 'seccomp_data' buffer into\n"
12369 " accumulator */\n"
12370 " BPF_STMT(BPF_LD | BPF_W | BPF_ABS,\n"
12371 " (offsetof(struct seccomp_data, nr))),\n"
12374 #. type: Plain text
12375 #: build/C/man2/seccomp.2:608
12378 " /* [3] Jump forward 1 instruction if system call number\n"
12379 " does not match 'syscall_nr' */\n"
12380 " BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, syscall_nr, 0, 1),\n"
12383 #. type: Plain text
12384 #: build/C/man2/seccomp.2:613
12387 " /* [4] Matching architecture and system call: don't execute\n"
12388 "\t the system call, and return 'f_errno' in 'errno' */\n"
12389 " BPF_STMT(BPF_RET | BPF_K,\n"
12390 " SECCOMP_RET_ERRNO | (f_errno & SECCOMP_RET_DATA)),\n"
12393 #. type: Plain text
12394 #: build/C/man2/seccomp.2:617
12397 " /* [5] Destination of system call number mismatch: allow other\n"
12398 " system calls */\n"
12399 " BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),\n"
12402 #. type: Plain text
12403 #: build/C/man2/seccomp.2:621
12406 " /* [6] Destination of architecture mismatch: kill process */\n"
12407 " BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL),\n"
12411 #. type: Plain text
12412 #: build/C/man2/seccomp.2:626
12415 " struct sock_fprog prog = {\n"
12416 " .len = (unsigned short) (sizeof(filter) / sizeof(filter[0])),\n"
12417 " .filter = filter,\n"
12421 #. type: Plain text
12422 #: build/C/man2/seccomp.2:631
12425 " if (seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog)) {\n"
12426 " perror(\"seccomp\");\n"
12431 #. type: Plain text
12432 #: build/C/man2/seccomp.2:634
12439 #. type: Plain text
12440 #: build/C/man2/seccomp.2:646
12444 "main(int argc, char **argv)\n"
12446 " if (argc E<lt> 5) {\n"
12447 " fprintf(stderr, \"Usage: \"\n"
12448 " \"%s E<lt>syscall_nrE<gt> E<lt>archE<gt> E<lt>errnoE<gt> "
12449 "E<lt>progE<gt> [E<lt>argsE<gt>]\\en\"\n"
12450 " \"Hint for E<lt>archE<gt>: AUDIT_ARCH_I386: 0x%X\\en\"\n"
12451 " \" AUDIT_ARCH_X86_64: 0x%X\\en\"\n"
12452 " \"\\en\", argv[0], AUDIT_ARCH_I386, AUDIT_ARCH_X86_64);\n"
12453 " exit(EXIT_FAILURE);\n"
12457 #. type: Plain text
12458 #: build/C/man2/seccomp.2:651
12461 " if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {\n"
12462 " perror(\"prctl\");\n"
12463 " exit(EXIT_FAILURE);\n"
12467 #. type: Plain text
12468 #: build/C/man2/seccomp.2:656
12471 " if (install_filter(strtol(argv[1], NULL, 0),\n"
12472 " strtol(argv[2], NULL, 0),\n"
12473 " strtol(argv[3], NULL, 0)))\n"
12474 " exit(EXIT_FAILURE);\n"
12477 #. type: Plain text
12478 #: build/C/man2/seccomp.2:661
12481 " execv(argv[4], &argv[4]);\n"
12482 " perror(\"execv\");\n"
12483 " exit(EXIT_FAILURE);\n"
12487 #. type: Plain text
12488 #: build/C/man2/seccomp.2:668
12489 msgid "B<prctl>(2), B<ptrace>(2), B<sigaction>(2), B<signal>(7), B<socket>(7)"
12492 #. type: Plain text
12493 #: build/C/man2/seccomp.2:673
12495 "The kernel source files I<Documentation/networking/filter.txt> and "
12496 "I<Documentation/prctl/seccomp_filter.txt>."
12499 #. type: Plain text
12500 #: build/C/man2/seccomp.2:678
12502 "McCanne, S. and Jacobson, V. (1992) I<The BSD Packet Filter: A New "
12503 "Architecture for User-level Packet Capture>, Proceedings of the USENIX "
12504 "Winter 1993 Conference E<.UR http://www.tcpdump.org/papers/bpf-usenix93.pdf> "