-.\" Hey Emacs! This file is -*- nroff -*- source.
-.\"
.\" Copyright (c) 1992 Drew Eckhardt <drew@cs.colorado.edu>, March 28, 1992
-.\" and Copyright (c) Michael Kerrisk, 2001, 2002, 2005
+.\" and Copyright (c) Michael Kerrisk, 2001, 2002, 2005, 2013
+.\"
+.\" %%%LICENSE_START(GPL_NOVERSION_ONELINE)
.\" May be distributed under the GNU General Public License.
+.\" %%%LICENSE_END
+.\"
.\" Modified by Michael Haardt <michael@moria.de>
.\" Modified 24 Jul 1993 by Rik Faith <faith@cs.unc.edu>
.\" Modified 21 Aug 1994 by Michael Chastain <mec@shell.portal.com>:
.\" FIXME Document CLONE_NEWUSER, which is new in 2.6.23
.\" (also supported for unshare()?)
.\"
-.TH CLONE 2 2011-09-08 "Linux" "Linux Programmer's Manual"
+.TH CLONE 2 2013-01-01 "Linux" "Linux Programmer's Manual"
.SH NAME
clone, __clone2 \- create a child process
.SH SYNOPSIS
When the child process is created with
.BR clone (),
it executes the function
-application
.IR fn ( arg ).
(This differs from
.BR fork (2),
.I ctid
in child memory.
.TP
-.B CLONE_FILES
+.BR CLONE_FILES " (since Linux 2.0)"
If
.B CLONE_FILES
is set, the calling process and the child process share the same file
performed by either the calling
process or the child process do not affect the other process.
.TP
-.B CLONE_FS
+.BR CLONE_FS " (since Linux 2.0)"
If
.B CLONE_FS
is set, the caller and the child process share the same file system
the calling process.
This flag is intended for the implementation of containers.
-An IPC namespace consists of the set of identifiers for
-System V IPC objects.
-(These objects are created using
-.BR msgctl (2),
-.BR semctl (2),
-and
-.BR shmctl (2)).
+An IPC namespace provides an isolated view of System V IPC objects (see
+.BR svipc (7))
+and (since Linux 2.6.30)
+.\" commit 7eafd7c74c3f2e67c27621b987b28397110d643f
+.\" https://lwn.net/Articles/312232/
+POSIX message queues
+(see
+.BR mq_overview (7)).
+The common characteristic of these IPC mechanisms is that IPC
+objects are identified by mechanisms other than filesystem
+pathnames.
+
Objects created in an IPC namespace are visible to all other processes
that are members of that namespace,
but are not visible to processes in other IPC namespaces.
When an IPC namespace is destroyed
-(i.e, when the last process that is a member of the namespace terminates),
+(i.e., when the last process that is a member of the namespace terminates),
all IPC objects in the namespace are automatically destroyed.
Use of this flag requires: a kernel configured with the
A physical network device can live in exactly one
network namespace.
A virtual network device ("veth") pair provides a pipe-like abstraction
+.\" FIXME Add pointer to veth(4) page when it is eventually completed
that can be used to create tunnels between network namespaces,
and can be used to create a bridge to a physical network device
in another namespace.
specified only by the system boot process (PID 0).
It disappeared in Linux 2.5.16.
.TP
-.B CLONE_PTRACE
+.BR CLONE_PTRACE " (since Linux 2.2)"
If
.B CLONE_PTRACE
is specified, and the calling process is being traced,
(See
.BR set_thread_area (2).)
.TP
-.B CLONE_SIGHAND
+.BR CLONE_SIGHAND " (since Linux 2.0)"
If
.B CLONE_SIGHAND
is set, the calling process and the child process share the same table of
.B CLONE_PTRACE
on this child process.
.TP
-.B CLONE_VFORK
+.BR CLONE_VFORK " (since Linux 2.2)"
If
.B CLONE_VFORK
is set, the execution of the calling process is suspended
after the call, and an application should not rely on execution occurring
in any particular order.
.TP
-.B CLONE_VM
+.BR CLONE_VM " (since Linux 2.0)"
If
.B CLONE_VM
is set, the calling process and the child process run in the same memory
Memory writes or file mappings/unmappings performed by one of the
processes do not affect the other, as with
.BR fork (2).
-.SS "sys_clone"
+.SS sys_clone
The
.B sys_clone
system call corresponds more closely to
.BR fork (2)
in that execution in the child continues from the point of the
call.
-Thus,
-.B sys_clone
-only requires the
-.I flags
+As such, the
+.I fn
and
-.I child_stack
-arguments, which have the same meaning as for
-.BR clone ().
-(Note that the order of these arguments differs from
-.BR clone ().)
+.I arg
+arguments of the
+.BR clone ()
+wrapper function are omitted.
+Furthermore, the argument order changes.
+The raw system call interface is roughly:
+.in +4
+.nf
+
+.BI "long clone(unsigned long " flags ", void *" child_stack ,
+.BI " void *" ptid ", void *" ctid ,
+.BI " struct pt_regs *" regs );
+.fi
+.in
Another difference for
.B sys_clone
is that the
In this case, for correct operation, the
.B CLONE_VM
option should not be specified.
-
+.SS Linux 2.4 and earlier
In Linux 2.4 and earlier,
.BR clone ()
does not take arguments
.IR tls ,
and
.IR ctid .
-.SH "RETURN VALUE"
+.SH RETURN VALUE
.\" gettid(2) returns current->pid;
.\" getpid(2) returns current->tgid;
On success, the thread ID of the child process is returned
glibc2 provides
.BR clone ()
as described in this manual page.
-.SH "CONFORMING TO"
+.SH CONFORMING TO
The
.BR clone ()
and
.\" See also the following bug reports
.\" https://bugzilla.redhat.com/show_bug.cgi?id=417521
.\" http://sourceware.org/bugzilla/show_bug.cgi?id=6910
-.SH "SEE ALSO"
+.SH EXAMPLE
+.SS Create a child that executes in a separate UTS namespace
+The following program demonstrates the use of
+.BR clone ()
+to create a child process that executes in a separate UTS namespace.
+The child changes the hostname in its UTS namespace.
+Both parent and child then display the system hostname,
+making it possible to see that the hostname
+differs in the UTS namespaces of the parent and child.
+For an example of the use of this program, see
+.BR setns (2).
+
+.nf
+#define _GNU_SOURCE
+#include <sys/wait.h>
+#include <sys/utsname.h>
+#include <sched.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
+ } while (0)
+
+static int /* Start function for cloned child */
+childFunc(void *arg)
+{
+ struct utsname uts;
+
+ /* Change hostname in UTS namespace of child */
+
+ if (sethostname(arg, strlen(arg)) == \-1)
+ errExit("sethostname");
+
+ /* Retrieve and display hostname */
+
+ if (uname(&uts) == \-1)
+ errExit("uname");
+ printf("uts.nodename in child: %s\\n", uts.nodename);
+
+ /* Keep the namespace open for a while, by sleeping.
+ This allows some experimentation\-\-for example, another
+ process might join the namespace. */
+
+ sleep(200);
+
+ return 0; /* Child terminates now */
+}
+
+#define STACK_SIZE (1024 * 1024) /* Stack size for cloned child */
+
+int
+main(int argc, char *argv[])
+{
+ char *stack; /* Start of stack buffer */
+ char *stackTop; /* End of stack buffer */
+ pid_t pid;
+ struct utsname uts;
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s <child\-hostname>\\n", argv[0]);
+ exit(EXIT_SUCCESS);
+ }
+
+ /* Allocate stack for child */
+
+ stack = malloc(STACK_SIZE);
+ if (stack == NULL)
+ errExit("malloc");
+ stackTop = stack + STACK_SIZE; /* Assume stack grows downward */
+
+ /* Create child that has its own UTS namespace;
+ child commences execution in childFunc() */
+
+ pid = clone(childFunc, stackTop, CLONE_NEWUTS | SIGCHLD, argv[1]);
+ if (pid == \-1)
+ errExit("clone");
+ printf("clone() returned %ld\\n", (long) pid);
+
+ /* Parent falls through to here */
+
+ sleep(1); /* Give child time to change its hostname */
+
+ /* Display hostname in parent\(aqs UTS namespace. This will be
+ different from hostname in child\(aqs UTS namespace. */
+
+ if (uname(&uts) == \-1)
+ errExit("uname");
+ printf("uts.nodename in parent: %s\\n", uts.nodename);
+
+ if (waitpid(pid, NULL, 0) == \-1) /* Wait for child */
+ errExit("waitpid");
+ printf("child has terminated\\n");
+
+ exit(EXIT_SUCCESS);
+}
+.fi
+.SH SEE ALSO
.BR fork (2),
.BR futex (2),
.BR getpid (2),
.BR gettid (2),
+.BR kcmp (2),
.BR set_thread_area (2),
.BR set_tid_address (2),
+.BR setns (2),
.BR tkill (2),
.BR unshare (2),
.BR wait (2),