libpthread/linuxthreads.old/manager.c

   1 /* Linuxthreads - a simple clone()-based implementation of Posix        */
   2 /* threads for Linux.                                                   */
   3 /* Copyright (C) 1996 Xavier Leroy (Xavier.Leroy@inria.fr)              */
   4 /*                                                                      */
   5 /* This program is free software; you can redistribute it and/or        */
   6 /* modify it under the terms of the GNU Library General Public License  */
   7 /* as published by the Free Software Foundation; either version 2       */
   8 /* of the License, or (at your option) any later version.               */
   9 /*                                                                      */
  10 /* This program is distributed in the hope that it will be useful,      */
  11 /* but WITHOUT ANY WARRANTY; without even the implied warranty of       */
  12 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        */
  13 /* GNU Library General Public License for more details.                 */
  14
  15 /* The "thread manager" thread: manages creation and termination of threads */
  16
  17 #include <features.h>
  18 #include <errno.h>
  19 #include <sched.h>
  20 #include <stddef.h>
  21 #include <stdio.h>
  22 #include <stdlib.h>
  23 #include <string.h>
  24 #include <unistd.h>
  25 #include <sys/poll.h>           /* for poll */
  26 #include <sys/mman.h>           /* for mmap */
  27 #include <sys/param.h>
  28 #include <sys/time.h>
  29 #include <sys/wait.h>           /* for waitpid macros */
  30
  31 #include "pthread.h"
  32 #include "internals.h"
  33 #include "spinlock.h"
  34 #include "restart.h"
  35 #include "semaphore.h"
  36 #include "debug.h" /* PDEBUG, added by StS */
  37
  38
  39 /* poll() is not supported in kernel <= 2.0, therefore is __NR_poll is
  40  * not available, we assume an old Linux kernel is in use and we will
  41  * use select() instead. */
  42 #include <sys/syscall.h>
  43 #ifndef __NR_poll
  44 # define USE_SELECT
  45 #endif
  46
  47 libpthread_hidden_proto(waitpid)
  48 libpthread_hidden_proto(raise)
  49
  50 /* Array of active threads. Entry 0 is reserved for the initial thread. */
  51 struct pthread_handle_struct __pthread_handles[PTHREAD_THREADS_MAX] =
  52 { { __LOCK_INITIALIZER, &__pthread_initial_thread, 0},
  53   { __LOCK_INITIALIZER, &__pthread_manager_thread, 0}, /* All NULLs */ };
  54
  55 /* For debugging purposes put the maximum number of threads in a variable.  */
  56 const int __linuxthreads_pthread_threads_max = PTHREAD_THREADS_MAX;
  57
  58 /* Indicate whether at least one thread has a user-defined stack (if 1),
  59    or if all threads have stacks supplied by LinuxThreads (if 0). */
  60 int __pthread_nonstandard_stacks;
  61
  62 /* Number of active entries in __pthread_handles (used by gdb) */
  63 volatile int __pthread_handles_num = 2;
  64
  65 /* Whether to use debugger additional actions for thread creation
  66    (set to 1 by gdb) */
  67 volatile int __pthread_threads_debug;
  68
  69 /* Globally enabled events.  */
  70 volatile td_thr_events_t __pthread_threads_events;
  71
  72 /* Pointer to thread descriptor with last event.  */
  73 volatile pthread_descr __pthread_last_event;
  74
  75 /* Mapping from stack segment to thread descriptor. */
  76 /* Stack segment numbers are also indices into the __pthread_handles array. */
  77 /* Stack segment number 0 is reserved for the initial thread. */
  78
  79 static __inline__ pthread_descr thread_segment(int seg)
  80 {
  81   return (pthread_descr)(THREAD_STACK_START_ADDRESS - (seg - 1) * STACK_SIZE)
  82          - 1;
  83 }
  84
  85 /* Flag set in signal handler to record child termination */
  86
  87 static volatile int terminated_children = 0;
  88
  89 /* Flag set when the initial thread is blocked on pthread_exit waiting
  90    for all other threads to terminate */
  91
  92 static int main_thread_exiting = 0;
  93
  94 /* Counter used to generate unique thread identifier.
  95    Thread identifier is pthread_threads_counter + segment. */
  96
  97 static pthread_t pthread_threads_counter = 0;
  98
  99 /* Forward declarations */
 100
 101 static int pthread_handle_create(pthread_t *thread, const pthread_attr_t *attr,
 102                                  void * (*start_routine)(void *), void *arg,
 103                                  sigset_t *mask, int father_pid,
 104                                  int report_events,
 105                                  td_thr_events_t *event_maskp);
 106 static void pthread_handle_free(pthread_t th_id);
 107 static void pthread_handle_exit(pthread_descr issuing_thread, int exitcode) attribute_noreturn;
 108 static void pthread_reap_children(void);
 109 static void pthread_kill_all_threads(int sig, int main_thread_also);
 110
 111 /* The server thread managing requests for thread creation and termination */
 112
 113 int attribute_noreturn __pthread_manager(void *arg)
 114 {
 115   int reqfd = (int) (long int) arg;
 116 #ifdef USE_SELECT
 117   struct timeval tv;
 118   fd_set fd;
 119 #else
 120   struct pollfd ufd;
 121 #endif
 122   sigset_t manager_mask;
 123   int n;
 124   struct pthread_request request;
 125
 126   /* If we have special thread_self processing, initialize it.  */
 127 #ifdef INIT_THREAD_SELF
 128   INIT_THREAD_SELF(&__pthread_manager_thread, 1);
 129 #endif
 130   /* Set the error variable.  */
 131   __pthread_manager_thread.p_errnop = &__pthread_manager_thread.p_errno;
 132   __pthread_manager_thread.p_h_errnop = &__pthread_manager_thread.p_h_errno;
 133
 134 #ifdef __UCLIBC_HAS_XLOCALE__
 135   /* Initialize thread's locale to the global locale. */
 136   __pthread_manager_thread.locale = __global_locale;
 137 #endif /* __UCLIBC_HAS_XLOCALE__ */
 138
 139   /* Block all signals except __pthread_sig_cancel and SIGTRAP */
 140   sigfillset(&manager_mask);
 141   sigdelset(&manager_mask, __pthread_sig_cancel); /* for thread termination */
 142   sigdelset(&manager_mask, SIGTRAP);            /* for debugging purposes */
 143   if (__pthread_threads_debug && __pthread_sig_debug > 0)
 144       sigdelset(&manager_mask, __pthread_sig_debug);
 145   sigprocmask(SIG_SETMASK, &manager_mask, NULL);
 146   /* Raise our priority to match that of main thread */
 147   __pthread_manager_adjust_prio(__pthread_main_thread->p_priority);
 148   /* Synchronize debugging of the thread manager */
 149   n = TEMP_FAILURE_RETRY(__libc_read(reqfd, (char *)&request,
 150                                      sizeof(request)));
 151 #ifndef USE_SELECT
 152   ufd.fd = reqfd;
 153   ufd.events = POLLIN;
 154 #endif
 155   /* Enter server loop */
 156   while(1) {
 157 #ifdef USE_SELECT
 158     tv.tv_sec = 2;
 159     tv.tv_usec = 0;
 160     FD_ZERO (&fd);
 161     FD_SET (reqfd, &fd);
 162     n = select (reqfd + 1, &fd, NULL, NULL, &tv);
 163 #else
 164     PDEBUG("before poll\n");
 165     n = poll(&ufd, 1, 2000);
 166     PDEBUG("after poll\n");
 167 #endif
 168     /* Check for termination of the main thread */
 169     if (getppid() == 1) {
 170       pthread_kill_all_threads(SIGKILL, 0);
 171       _exit(0);
 172     }
 173     /* Check for dead children */
 174     if (terminated_children) {
 175       terminated_children = 0;
 176       pthread_reap_children();
 177     }
 178     /* Read and execute request */
 179 #ifdef USE_SELECT
 180     if (n == 1)
 181 #else
 182     if (n == 1 && (ufd.revents & POLLIN))
 183 #endif
 184     {
 185
 186       PDEBUG("before __libc_read\n");
 187       n = __libc_read(reqfd, (char *)&request, sizeof(request));
 188       PDEBUG("after __libc_read, n=%d\n", n);
 189       switch(request.req_kind) {
 190       case REQ_CREATE:
 191         PDEBUG("got REQ_CREATE\n");
 192         request.req_thread->p_retcode =
 193           pthread_handle_create((pthread_t *) &request.req_thread->p_retval,
 194                                 request.req_args.create.attr,
 195                                 request.req_args.create.fn,
 196                                 request.req_args.create.arg,
 197                                 &request.req_args.create.mask,
 198                                 request.req_thread->p_pid,
 199                                 request.req_thread->p_report_events,
 200                                 &request.req_thread->p_eventbuf.eventmask);
 201         PDEBUG("restarting %d\n", request.req_thread);
 202         restart(request.req_thread);
 203         break;
 204       case REQ_FREE:
 205         PDEBUG("got REQ_FREE\n");
 206         pthread_handle_free(request.req_args.free.thread_id);
 207         break;
 208       case REQ_PROCESS_EXIT:
 209         PDEBUG("got REQ_PROCESS_EXIT from %d, exit code = %d\n",
 210         request.req_thread, request.req_args.exit.code);
 211         pthread_handle_exit(request.req_thread,
 212                             request.req_args.exit.code);
 213         break;
 214       case REQ_MAIN_THREAD_EXIT:
 215         PDEBUG("got REQ_MAIN_THREAD_EXIT\n");
 216         main_thread_exiting = 1;
 217         /* Reap children in case all other threads died and the signal handler
 218            went off before we set main_thread_exiting to 1, and therefore did
 219            not do REQ_KICK. */
 220         pthread_reap_children();
 221
 222         if (__pthread_main_thread->p_nextlive == __pthread_main_thread) {
 223           restart(__pthread_main_thread);
 224           /* The main thread will now call exit() which will trigger an
 225              __on_exit handler, which in turn will send REQ_PROCESS_EXIT
 226              to the thread manager. In case you are wondering how the
 227              manager terminates from its loop here. */
 228         }
 229         break;
 230       case REQ_POST:
 231         PDEBUG("got REQ_POST\n");
 232         __new_sem_post(request.req_args.post);
 233         break;
 234       case REQ_DEBUG:
 235         PDEBUG("got REQ_DEBUG\n");
 236         /* Make gdb aware of new thread and gdb will restart the
 237            new thread when it is ready to handle the new thread. */
 238         if (__pthread_threads_debug && __pthread_sig_debug > 0) {
 239       PDEBUG("about to call raise(__pthread_sig_debug)\n");
 240           raise(__pthread_sig_debug);
 241         }
 242       case REQ_KICK:
 243         /* This is just a prod to get the manager to reap some
 244            threads right away, avoiding a potential delay at shutdown. */
 245         break;
 246       }
 247     }
 248   }
 249 }
 250
 251 int __pthread_manager_event(void *arg)
 252 {
 253   /* If we have special thread_self processing, initialize it.  */
 254 #ifdef INIT_THREAD_SELF
 255   INIT_THREAD_SELF(&__pthread_manager_thread, 1);
 256 #endif
 257
 258   /* Get the lock the manager will free once all is correctly set up.  */
 259   __pthread_lock (THREAD_GETMEM((&__pthread_manager_thread), p_lock), NULL);
 260   /* Free it immediately.  */
 261   __pthread_unlock (THREAD_GETMEM((&__pthread_manager_thread), p_lock));
 262
 263   return __pthread_manager(arg);
 264 }
 265
 266 /* Process creation */
 267 static int
 268 attribute_noreturn
 269 pthread_start_thread(void *arg)
 270 {
 271   pthread_descr self = (pthread_descr) arg;
 272   struct pthread_request request;
 273   void * outcome;
 274   /* Initialize special thread_self processing, if any.  */
 275 #ifdef INIT_THREAD_SELF
 276   INIT_THREAD_SELF(self, self->p_nr);
 277 #endif
 278   PDEBUG("\n");
 279   /* Make sure our pid field is initialized, just in case we get there
 280      before our father has initialized it. */
 281   THREAD_SETMEM(self, p_pid, getpid());
 282   /* Initial signal mask is that of the creating thread. (Otherwise,
 283      we'd just inherit the mask of the thread manager.) */
 284   sigprocmask(SIG_SETMASK, &self->p_start_args.mask, NULL);
 285   /* Set the scheduling policy and priority for the new thread, if needed */
 286   if (THREAD_GETMEM(self, p_start_args.schedpolicy) >= 0)
 287     /* Explicit scheduling attributes were provided: apply them */
 288     sched_setscheduler(THREAD_GETMEM(self, p_pid),
 289                          THREAD_GETMEM(self, p_start_args.schedpolicy),
 290                          &self->p_start_args.schedparam);
 291   else if (__pthread_manager_thread.p_priority > 0)
 292     /* Default scheduling required, but thread manager runs in realtime
 293        scheduling: switch new thread to SCHED_OTHER policy */
 294     {
 295       struct sched_param default_params;
 296       default_params.sched_priority = 0;
 297       sched_setscheduler(THREAD_GETMEM(self, p_pid),
 298                            SCHED_OTHER, &default_params);
 299     }
 300   /* Make gdb aware of new thread */
 301   if (__pthread_threads_debug && __pthread_sig_debug > 0) {
 302     request.req_thread = self;
 303     request.req_kind = REQ_DEBUG;
 304     TEMP_FAILURE_RETRY(__libc_write(__pthread_manager_request,
 305                 (char *) &request, sizeof(request)));
 306     suspend(self);
 307   }
 308   /* Run the thread code */
 309   outcome = self->p_start_args.start_routine(THREAD_GETMEM(self,
 310                                                            p_start_args.arg));
 311   /* Exit with the given return value */
 312   __pthread_do_exit(outcome, CURRENT_STACK_FRAME);
 313 }
 314
 315 static int
 316 attribute_noreturn
 317 pthread_start_thread_event(void *arg)
 318 {
 319   pthread_descr self = (pthread_descr) arg;
 320
 321 #ifdef INIT_THREAD_SELF
 322   INIT_THREAD_SELF(self, self->p_nr);
 323 #endif
 324   /* Make sure our pid field is initialized, just in case we get there
 325      before our father has initialized it. */
 326   THREAD_SETMEM(self, p_pid, getpid());
 327   /* Get the lock the manager will free once all is correctly set up.  */
 328   __pthread_lock (THREAD_GETMEM(self, p_lock), NULL);
 329   /* Free it immediately.  */
 330   __pthread_unlock (THREAD_GETMEM(self, p_lock));
 331
 332   /* Continue with the real function.  */
 333   pthread_start_thread (arg);
 334 }
 335
 336 static int pthread_allocate_stack(const pthread_attr_t *attr,
 337                                   pthread_descr default_new_thread,
 338                                   int pagesize,
 339                                   pthread_descr * out_new_thread,
 340                                   char ** out_new_thread_bottom,
 341                                   char ** out_guardaddr,
 342                                   size_t * out_guardsize)
 343 {
 344   pthread_descr new_thread;
 345   char * new_thread_bottom;
 346   char * guardaddr;
 347   size_t stacksize, guardsize;
 348
 349   if (attr != NULL && attr->__stackaddr_set)
 350     {
 351       /* The user provided a stack. */
 352       new_thread =
 353         (pthread_descr) ((long)(attr->__stackaddr) & -sizeof(void *)) - 1;
 354       new_thread_bottom = (char *) attr->__stackaddr - attr->__stacksize;
 355       guardaddr = NULL;
 356       guardsize = 0;
 357       __pthread_nonstandard_stacks = 1;
 358 #ifndef __ARCH_USE_MMU__
 359       /* check the initial thread stack boundaries so they don't overlap */
 360       NOMMU_INITIAL_THREAD_BOUNDS((char *) new_thread, (char *) new_thread_bottom);
 361
 362       PDEBUG("initial stack: bos=%p, tos=%p\n", __pthread_initial_thread_bos,
 363             __pthread_initial_thread_tos);
 364 #endif
 365     }
 366   else
 367     {
 368 #ifdef __ARCH_USE_MMU__
 369       stacksize = STACK_SIZE - pagesize;
 370       if (attr != NULL)
 371         stacksize = MIN (stacksize, roundup(attr->__stacksize, pagesize));
 372       /* Allocate space for stack and thread descriptor at default address */
 373       new_thread = default_new_thread;
 374       new_thread_bottom = (char *) (new_thread + 1) - stacksize;
 375       if (mmap((caddr_t)((char *)(new_thread + 1) - INITIAL_STACK_SIZE),
 376                INITIAL_STACK_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC,
 377                MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED | MAP_GROWSDOWN,
 378                -1, 0) == MAP_FAILED)
 379         /* Bad luck, this segment is already mapped. */
 380         return -1;
 381       /* We manage to get a stack.  Now see whether we need a guard
 382          and allocate it if necessary.  Notice that the default
 383          attributes (stack_size = STACK_SIZE - pagesize) do not need
 384          a guard page, since the RLIMIT_STACK soft limit prevents stacks
 385          from running into one another. */
 386       if (stacksize == (size_t) (STACK_SIZE - pagesize))
 387         {
 388           /* We don't need a guard page. */
 389           guardaddr = NULL;
 390           guardsize = 0;
 391         }
 392       else
 393         {
 394           /* Put a bad page at the bottom of the stack */
 395           guardsize = attr->__guardsize;
 396           guardaddr = (void *)new_thread_bottom - guardsize;
 397           if (mmap ((caddr_t) guardaddr, guardsize, 0, MAP_FIXED, -1, 0)
 398               == MAP_FAILED)
 399             {
 400               /* We don't make this an error.  */
 401               guardaddr = NULL;
 402               guardsize = 0;
 403             }
 404         }
 405 #else
 406       /* We cannot mmap to this huge chunk of stack space when we don't have
 407        * an MMU. Pretend we are using a user provided stack even if there was
 408        * none provided by the user. Thus, we get around the mmap and reservation
 409        * of a huge stack segment. -StS */
 410
 411       stacksize = INITIAL_STACK_SIZE;
 412       /* The user may want to use a non-default stacksize */
 413       if (attr != NULL)
 414         {
 415           stacksize = attr->__stacksize;
 416         }
 417
 418       /* malloc a stack - memory from the bottom up */
 419       if ((new_thread_bottom = malloc(stacksize)) == NULL)
 420         {
 421           /* bad luck, we cannot malloc any more */
 422           return -1 ;
 423         }
 424       PDEBUG("malloced chunk: base=%p, size=0x%04x\n", new_thread_bottom, stacksize);
 425
 426       /* Set up the pointers. new_thread marks the TOP of the stack frame and
 427        * the address of the pthread_descr struct at the same time. Therefore we
 428        * must account for its size and fit it in the malloc()'ed block. The
 429        * value of `new_thread' is then passed to clone() as the stack argument.
 430        *
 431        *               ^ +------------------------+
 432        *               | |  pthread_descr struct  |
 433        *               | +------------------------+  <- new_thread
 434        * malloc block  | |                        |
 435        *               | |  thread stack          |
 436        *               | |                        |
 437        *               v +------------------------+  <- new_thread_bottom
 438        *
 439        * Note: The calculated value of new_thread must be word aligned otherwise
 440        * the kernel chokes on a non-aligned stack frame. Choose the lower
 441        * available word boundary.
 442        */
 443       new_thread = ((pthread_descr) ((int)(new_thread_bottom + stacksize) & -sizeof(void*))) - 1;
 444       guardaddr = NULL;
 445       guardsize = 0;
 446
 447       PDEBUG("thread stack: bos=%p, tos=%p\n", new_thread_bottom, new_thread);
 448
 449       /* check the initial thread stack boundaries so they don't overlap */
 450       NOMMU_INITIAL_THREAD_BOUNDS((char *) new_thread, (char *) new_thread_bottom);
 451
 452       PDEBUG("initial stack: bos=%p, tos=%p\n", __pthread_initial_thread_bos,
 453              __pthread_initial_thread_tos);
 454
 455       /* on non-MMU systems we always have non-standard stack frames */
 456       __pthread_nonstandard_stacks = 1;
 457
 458 #endif /* __ARCH_USE_MMU__ */
 459     }
 460
 461   /* Clear the thread data structure.  */
 462   memset (new_thread, '\0', sizeof (*new_thread));
 463   *out_new_thread = new_thread;
 464   *out_new_thread_bottom = new_thread_bottom;
 465   *out_guardaddr = guardaddr;
 466   *out_guardsize = guardsize;
 467   return 0;
 468 }
 469
 470 static int pthread_handle_create(pthread_t *thread, const pthread_attr_t *attr,
 471                                  void * (*start_routine)(void *), void *arg,
 472                                  sigset_t * mask, int father_pid,
 473                                  int report_events,
 474                                  td_thr_events_t *event_maskp)
 475 {
 476   size_t sseg;
 477   int pid;
 478   pthread_descr new_thread;
 479   char * new_thread_bottom;
 480   pthread_t new_thread_id;
 481   char *guardaddr = NULL;
 482   size_t guardsize = 0;
 483   int pagesize = getpagesize();
 484   int saved_errno = 0;
 485
 486   /* First check whether we have to change the policy and if yes, whether
 487      we can  do this.  Normally this should be done by examining the
 488      return value of the sched_setscheduler call in pthread_start_thread
 489      but this is hard to implement.  FIXME  */
 490   if (attr != NULL && attr->__schedpolicy != SCHED_OTHER && geteuid () != 0)
 491     return EPERM;
 492   /* Find a free segment for the thread, and allocate a stack if needed */
 493   for (sseg = 2; ; sseg++)
 494     {
 495       if (sseg >= PTHREAD_THREADS_MAX)
 496         return EAGAIN;
 497       if (__pthread_handles[sseg].h_descr != NULL)
 498         continue;
 499       if (pthread_allocate_stack(attr, thread_segment(sseg), pagesize,
 500                                  &new_thread, &new_thread_bottom,
 501                                  &guardaddr, &guardsize) == 0)
 502         break;
 503 #ifndef __ARCH_USE_MMU__
 504       else
 505         /* When there is MMU, mmap () is used to allocate the stack. If one
 506          * segment is already mapped, we should continue to see if we can
 507          * use the next one. However, when there is no MMU, malloc () is used.
 508          * It's waste of CPU cycles to continue to try if it fails.  */
 509         return EAGAIN;
 510 #endif
 511     }
 512   __pthread_handles_num++;
 513   /* Allocate new thread identifier */
 514   pthread_threads_counter += PTHREAD_THREADS_MAX;
 515   new_thread_id = sseg + pthread_threads_counter;
 516   /* Initialize the thread descriptor.  Elements which have to be
 517      initialized to zero already have this value.  */
 518   new_thread->p_tid = new_thread_id;
 519   new_thread->p_lock = &(__pthread_handles[sseg].h_lock);
 520   new_thread->p_cancelstate = PTHREAD_CANCEL_ENABLE;
 521   new_thread->p_canceltype = PTHREAD_CANCEL_DEFERRED;
 522   new_thread->p_errnop = &new_thread->p_errno;
 523   new_thread->p_h_errnop = &new_thread->p_h_errno;
 524 #ifdef __UCLIBC_HAS_XLOCALE__
 525   /* Initialize thread's locale to the global locale. */
 526   new_thread->locale = __global_locale;
 527 #endif /* __UCLIBC_HAS_XLOCALE__ */
 528   new_thread->p_guardaddr = guardaddr;
 529   new_thread->p_guardsize = guardsize;
 530   new_thread->p_self = new_thread;
 531   new_thread->p_nr = sseg;
 532   /* Initialize the thread handle */
 533   __pthread_init_lock(&__pthread_handles[sseg].h_lock);
 534   __pthread_handles[sseg].h_descr = new_thread;
 535   __pthread_handles[sseg].h_bottom = new_thread_bottom;
 536   /* Determine scheduling parameters for the thread */
 537   new_thread->p_start_args.schedpolicy = -1;
 538   if (attr != NULL) {
 539     new_thread->p_detached = attr->__detachstate;
 540     new_thread->p_userstack = attr->__stackaddr_set;
 541
 542     switch(attr->__inheritsched) {
 543     case PTHREAD_EXPLICIT_SCHED:
 544       new_thread->p_start_args.schedpolicy = attr->__schedpolicy;
 545       memcpy (&new_thread->p_start_args.schedparam, &attr->__schedparam,
 546               sizeof (struct sched_param));
 547       break;
 548     case PTHREAD_INHERIT_SCHED:
 549       new_thread->p_start_args.schedpolicy = sched_getscheduler(father_pid);
 550       sched_getparam(father_pid, &new_thread->p_start_args.schedparam);
 551       break;
 552     }
 553     new_thread->p_priority =
 554       new_thread->p_start_args.schedparam.sched_priority;
 555   }
 556   /* Finish setting up arguments to pthread_start_thread */
 557   new_thread->p_start_args.start_routine = start_routine;
 558   new_thread->p_start_args.arg = arg;
 559   new_thread->p_start_args.mask = *mask;
 560   /* Raise priority of thread manager if needed */
 561   __pthread_manager_adjust_prio(new_thread->p_priority);
 562   /* Do the cloning.  We have to use two different functions depending
 563      on whether we are debugging or not.  */
 564   pid = 0;     /* Note that the thread never can have PID zero.  */
 565
 566
 567   /* ******************************************************** */
 568   /*  This code was moved from below to cope with running threads
 569    *  on uClinux systems.  See comment below...
 570    * Insert new thread in doubly linked list of active threads */
 571   new_thread->p_prevlive = __pthread_main_thread;
 572   new_thread->p_nextlive = __pthread_main_thread->p_nextlive;
 573   __pthread_main_thread->p_nextlive->p_prevlive = new_thread;
 574   __pthread_main_thread->p_nextlive = new_thread;
 575   /* ********************************************************* */
 576
 577   if (report_events)
 578     {
 579       /* See whether the TD_CREATE event bit is set in any of the
 580          masks.  */
 581       int idx = __td_eventword (TD_CREATE);
 582       uint32_t mask = __td_eventmask (TD_CREATE);
 583
 584       if ((mask & (__pthread_threads_events.event_bits[idx]
 585                    | event_maskp->event_bits[idx])) != 0)
 586         {
 587           /* Lock the mutex the child will use now so that it will stop.  */
 588           __pthread_lock(new_thread->p_lock, NULL);
 589
 590           /* We have to report this event.  */
 591 #ifdef __ia64__
 592           pid = __clone2(pthread_start_thread_event, (void **) new_thread,
 593                         (char *)new_thread - new_thread_bottom,
 594                         CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
 595                         __pthread_sig_cancel, new_thread);
 596 #else
 597           pid = clone(pthread_start_thread_event, (void **) new_thread,
 598                         CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
 599                         __pthread_sig_cancel, new_thread);
 600 #endif
 601
 602           saved_errno = errno;
 603           if (pid != -1)
 604             {
 605               /* Now fill in the information about the new thread in
 606                  the newly created thread's data structure.  We cannot let
 607                  the new thread do this since we don't know whether it was
 608                  already scheduled when we send the event.  */
 609               new_thread->p_eventbuf.eventdata = new_thread;
 610               new_thread->p_eventbuf.eventnum = TD_CREATE;
 611               __pthread_last_event = new_thread;
 612
 613               /* We have to set the PID here since the callback function
 614                  in the debug library will need it and we cannot guarantee
 615                  the child got scheduled before the debugger.  */
 616               new_thread->p_pid = pid;
 617
 618               /* Now call the function which signals the event.  */
 619               __linuxthreads_create_event ();
 620
 621               /* Now restart the thread.  */
 622               __pthread_unlock(new_thread->p_lock);
 623             }
 624         }
 625     }
 626   if (pid == 0)
 627     {
 628       PDEBUG("cloning new_thread = %p\n", new_thread);
 629 #ifdef __ia64__
 630       pid = __clone2(pthread_start_thread, (void **) new_thread,
 631                         (char *)new_thread - new_thread_bottom,
 632                     CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
 633                     __pthread_sig_cancel, new_thread);
 634 #else
 635       pid = clone(pthread_start_thread, (void **) new_thread,
 636                     CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
 637                     __pthread_sig_cancel, new_thread);
 638 #endif
 639       saved_errno = errno;
 640     }
 641   /* Check if cloning succeeded */
 642   if (pid == -1) {
 643     /********************************************************
 644      * Code inserted to remove the thread from our list of active
 645      * threads in case of failure (needed to cope with uClinux),
 646      * See comment below. */
 647     new_thread->p_nextlive->p_prevlive = new_thread->p_prevlive;
 648     new_thread->p_prevlive->p_nextlive = new_thread->p_nextlive;
 649     /********************************************************/
 650
 651     /* Free the stack if we allocated it */
 652     if (attr == NULL || !attr->__stackaddr_set)
 653       {
 654 #ifdef __ARCH_USE_MMU__
 655         if (new_thread->p_guardsize != 0)
 656           munmap(new_thread->p_guardaddr, new_thread->p_guardsize);
 657         munmap((caddr_t)((char *)(new_thread+1) - INITIAL_STACK_SIZE),
 658                INITIAL_STACK_SIZE);
 659 #else
 660         free(new_thread_bottom);
 661 #endif /* __ARCH_USE_MMU__ */
 662       }
 663     __pthread_handles[sseg].h_descr = NULL;
 664     __pthread_handles[sseg].h_bottom = NULL;
 665     __pthread_handles_num--;
 666     return errno;
 667   }
 668   PDEBUG("new thread pid = %d\n", pid);
 669
 670 #if 0
 671   /* ***********************************************************
 672    This code has been moved before the call to clone().  In uClinux,
 673    the use of wait on a semaphore is dependant upon that the child so
 674    the child must be in the active threads list. This list is used in
 675    pthread_find_self() to get the pthread_descr of self. So, if the
 676    child calls sem_wait before this code is executed , it will hang
 677    forever and initial_thread will instead be posted by a sem_post
 678    call. */
 679
 680   /* Insert new thread in doubly linked list of active threads */
 681   new_thread->p_prevlive = __pthread_main_thread;
 682   new_thread->p_nextlive = __pthread_main_thread->p_nextlive;
 683   __pthread_main_thread->p_nextlive->p_prevlive = new_thread;
 684   __pthread_main_thread->p_nextlive = new_thread;
 685   /************************************************************/
 686 #endif
 687
 688   /* Set pid field of the new thread, in case we get there before the
 689      child starts. */
 690   new_thread->p_pid = pid;
 691   /* We're all set */
 692   *thread = new_thread_id;
 693   return 0;
 694 }
 695
 696
 697 /* Try to free the resources of a thread when requested by pthread_join
 698    or pthread_detach on a terminated thread. */
 699
 700 static void pthread_free(pthread_descr th)
 701 {
 702   pthread_handle handle;
 703   pthread_readlock_info *iter, *next;
 704   char *h_bottom_save;
 705
 706   /* Make the handle invalid */
 707   handle =  thread_handle(th->p_tid);
 708   __pthread_lock(&handle->h_lock, NULL);
 709   h_bottom_save = handle->h_bottom;
 710   handle->h_descr = NULL;
 711   handle->h_bottom = (char *)(-1L);
 712   __pthread_unlock(&handle->h_lock);
 713 #ifdef FREE_THREAD_SELF
 714   FREE_THREAD_SELF(th, th->p_nr);
 715 #endif
 716   /* One fewer threads in __pthread_handles */
 717   __pthread_handles_num--;
 718
 719   /* Destroy read lock list, and list of free read lock structures.
 720      If the former is not empty, it means the thread exited while
 721      holding read locks! */
 722
 723   for (iter = th->p_readlock_list; iter != NULL; iter = next)
 724     {
 725       next = iter->pr_next;
 726       free(iter);
 727     }
 728
 729   for (iter = th->p_readlock_free; iter != NULL; iter = next)
 730     {
 731       next = iter->pr_next;
 732       free(iter);
 733     }
 734
 735   /* If initial thread, nothing to free */
 736   if (th == &__pthread_initial_thread) return;
 737 #ifdef __ARCH_USE_MMU__
 738   if (!th->p_userstack)
 739     {
 740       /* Free the stack and thread descriptor area */
 741       if (th->p_guardsize != 0)
 742         munmap(th->p_guardaddr, th->p_guardsize);
 743       munmap((caddr_t) ((char *)(th+1) - STACK_SIZE), STACK_SIZE);
 744     }
 745 #else
 746   /* For non-MMU systems we always malloc the stack, so free it here. -StS */
 747   if (!th->p_userstack) {
 748       free(h_bottom_save);
 749   }
 750 #endif /* __ARCH_USE_MMU__ */
 751 }
 752
 753 /* Handle threads that have exited */
 754
 755 static void pthread_exited(pid_t pid)
 756 {
 757   pthread_descr th;
 758   int detached;
 759   /* Find thread with that pid */
 760   for (th = __pthread_main_thread->p_nextlive;
 761        th != __pthread_main_thread;
 762        th = th->p_nextlive) {
 763     if (th->p_pid == pid) {
 764       /* Remove thread from list of active threads */
 765       th->p_nextlive->p_prevlive = th->p_prevlive;
 766       th->p_prevlive->p_nextlive = th->p_nextlive;
 767       /* Mark thread as exited, and if detached, free its resources */
 768       __pthread_lock(th->p_lock, NULL);
 769       th->p_exited = 1;
 770       /* If we have to signal this event do it now.  */
 771       if (th->p_report_events)
 772         {
 773           /* See whether TD_REAP is in any of the mask.  */
 774           int idx = __td_eventword (TD_REAP);
 775           uint32_t mask = __td_eventmask (TD_REAP);
 776
 777           if ((mask & (__pthread_threads_events.event_bits[idx]
 778                        | th->p_eventbuf.eventmask.event_bits[idx])) != 0)
 779             {
 780               /* Yep, we have to signal the reapage.  */
 781               th->p_eventbuf.eventnum = TD_REAP;
 782               th->p_eventbuf.eventdata = th;
 783               __pthread_last_event = th;
 784
 785               /* Now call the function to signal the event.  */
 786               __linuxthreads_reap_event();
 787             }
 788         }
 789       detached = th->p_detached;
 790       __pthread_unlock(th->p_lock);
 791       if (detached)
 792         pthread_free(th);
 793       break;
 794     }
 795   }
 796   /* If all threads have exited and the main thread is pending on a
 797      pthread_exit, wake up the main thread and terminate ourselves. */
 798   if (main_thread_exiting &&
 799       __pthread_main_thread->p_nextlive == __pthread_main_thread) {
 800     restart(__pthread_main_thread);
 801     /* Same logic as REQ_MAIN_THREAD_EXIT. */
 802   }
 803 }
 804
 805 static void pthread_reap_children(void)
 806 {
 807   pid_t pid;
 808   int status;
 809   PDEBUG("\n");
 810
 811   while ((pid = __libc_waitpid(-1, &status, WNOHANG | __WCLONE)) > 0) {
 812     pthread_exited(pid);
 813     if (WIFSIGNALED(status)) {
 814       /* If a thread died due to a signal, send the same signal to
 815          all other threads, including the main thread. */
 816       pthread_kill_all_threads(WTERMSIG(status), 1);
 817       _exit(0);
 818     }
 819   }
 820 }
 821
 822 /* Try to free the resources of a thread when requested by pthread_join
 823    or pthread_detach on a terminated thread. */
 824
 825 static void pthread_handle_free(pthread_t th_id)
 826 {
 827   pthread_handle handle = thread_handle(th_id);
 828   pthread_descr th;
 829
 830   __pthread_lock(&handle->h_lock, NULL);
 831   if (invalid_handle(handle, th_id)) {
 832     /* pthread_reap_children has deallocated the thread already,
 833        nothing needs to be done */
 834     __pthread_unlock(&handle->h_lock);
 835     return;
 836   }
 837   th = handle->h_descr;
 838   if (th->p_exited) {
 839     __pthread_unlock(&handle->h_lock);
 840     pthread_free(th);
 841   } else {
 842     /* The Unix process of the thread is still running.
 843        Mark the thread as detached so that the thread manager will
 844        deallocate its resources when the Unix process exits. */
 845     th->p_detached = 1;
 846     __pthread_unlock(&handle->h_lock);
 847   }
 848 }
 849
 850 /* Send a signal to all running threads */
 851
 852 static void pthread_kill_all_threads(int sig, int main_thread_also)
 853 {
 854   pthread_descr th;
 855   for (th = __pthread_main_thread->p_nextlive;
 856        th != __pthread_main_thread;
 857        th = th->p_nextlive) {
 858     kill(th->p_pid, sig);
 859   }
 860   if (main_thread_also) {
 861     kill(__pthread_main_thread->p_pid, sig);
 862   }
 863 }
 864
 865 /* Process-wide exit() */
 866
 867 static void pthread_handle_exit(pthread_descr issuing_thread, int exitcode)
 868 {
 869   pthread_descr th;
 870   __pthread_exit_requested = 1;
 871   __pthread_exit_code = exitcode;
 872   /* Send the CANCEL signal to all running threads, including the main
 873      thread, but excluding the thread from which the exit request originated
 874      (that thread must complete the exit, e.g. calling atexit functions
 875      and flushing stdio buffers). */
 876   for (th = issuing_thread->p_nextlive;
 877        th != issuing_thread;
 878        th = th->p_nextlive) {
 879     kill(th->p_pid, __pthread_sig_cancel);
 880   }
 881   /* Now, wait for all these threads, so that they don't become zombies
 882      and their times are properly added to the thread manager's times. */
 883   for (th = issuing_thread->p_nextlive;
 884        th != issuing_thread;
 885        th = th->p_nextlive) {
 886     waitpid(th->p_pid, NULL, __WCLONE);
 887   }
 888   restart(issuing_thread);
 889   _exit(0);
 890 }
 891
 892 /* Handler for __pthread_sig_cancel in thread manager thread */
 893
 894 void __pthread_manager_sighandler(int sig attribute_unused)
 895 {
 896     int kick_manager = terminated_children == 0 && main_thread_exiting;
 897     terminated_children = 1;
 898
 899     /* If the main thread is terminating, kick the thread manager loop
 900        each time some threads terminate. This eliminates a two second
 901        shutdown delay caused by the thread manager sleeping in the
 902        call to __poll(). Instead, the thread manager is kicked into
 903        action, reaps the outstanding threads and resumes the main thread
 904        so that it can complete the shutdown. */
 905
 906     if (kick_manager) {
 907         struct pthread_request request;
 908         request.req_thread = 0;
 909         request.req_kind = REQ_KICK;
 910         TEMP_FAILURE_RETRY(__libc_write(__pthread_manager_request,
 911                     (char *) &request, sizeof(request)));
 912     }
 913 }
 914
 915 /* Adjust priority of thread manager so that it always run at a priority
 916    higher than all threads */
 917
 918 void __pthread_manager_adjust_prio(int thread_prio)
 919 {
 920   struct sched_param param;
 921
 922   if (thread_prio <= __pthread_manager_thread.p_priority) return;
 923   param.sched_priority =
 924     thread_prio < sched_get_priority_max(SCHED_FIFO)
 925     ? thread_prio + 1 : thread_prio;
 926   sched_setscheduler(__pthread_manager_thread.p_pid, SCHED_FIFO, &param);
 927   __pthread_manager_thread.p_priority = thread_prio;
 928 }