libpthread/linuxthreads.old/manager.c

   1 /* Linuxthreads - a simple clone()-based implementation of Posix        */
   2 /* threads for Linux.                                                   */
   3 /* Copyright (C) 1996 Xavier Leroy (Xavier.Leroy@inria.fr)              */
   4 /*                                                                      */
   5 /* This program is free software; you can redistribute it and/or        */
   6 /* modify it under the terms of the GNU Library General Public License  */
   7 /* as published by the Free Software Foundation; either version 2       */
   8 /* of the License, or (at your option) any later version.               */
   9 /*                                                                      */
  10 /* This program is distributed in the hope that it will be useful,      */
  11 /* but WITHOUT ANY WARRANTY; without even the implied warranty of       */
  12 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        */
  13 /* GNU Library General Public License for more details.                 */
  14
  15 /* The "thread manager" thread: manages creation and termination of threads */
  16
  17 #include <features.h>
  18 #include <errno.h>
  19 #include <sched.h>
  20 #include <stddef.h>
  21 #include <stdio.h>
  22 #include <stdlib.h>
  23 #include <string.h>
  24 #include <unistd.h>
  25 #include <sys/poll.h>           /* for poll */
  26 #include <sys/mman.h>           /* for mmap */
  27 #include <sys/param.h>
  28 #include <sys/time.h>
  29 #include <sys/wait.h>           /* for waitpid macros */
  30
  31 #include "pthread.h"
  32 #include "internals.h"
  33 #include "spinlock.h"
  34 #include "restart.h"
  35 #include "semaphore.h"
  36 #include "debug.h" /* PDEBUG, added by StS */
  37
  38
  39 /* poll() is not supported in kernel <= 2.0, therefore is __NR_poll is
  40  * not available, we assume an old Linux kernel is in use and we will
  41  * use select() instead. */
  42 #include <sys/syscall.h>
  43 #ifndef __NR_poll
  44 # define USE_SELECT
  45 #endif
  46
  47 libpthread_hidden_proto(waitpid)
  48 libpthread_hidden_proto(raise)
  49
  50 /* Array of active threads. Entry 0 is reserved for the initial thread. */
  51 struct pthread_handle_struct __pthread_handles[PTHREAD_THREADS_MAX] =
  52 { { __LOCK_INITIALIZER, &__pthread_initial_thread, 0},
  53   { __LOCK_INITIALIZER, &__pthread_manager_thread, 0}, /* All NULLs */ };
  54
  55 /* For debugging purposes put the maximum number of threads in a variable.  */
  56 const int __linuxthreads_pthread_threads_max = PTHREAD_THREADS_MAX;
  57
  58 /* Indicate whether at least one thread has a user-defined stack (if 1),
  59    or if all threads have stacks supplied by LinuxThreads (if 0). */
  60 int __pthread_nonstandard_stacks;
  61
  62 /* Number of active entries in __pthread_handles (used by gdb) */
  63 volatile int __pthread_handles_num = 2;
  64
  65 /* Whether to use debugger additional actions for thread creation
  66    (set to 1 by gdb) */
  67 volatile int __pthread_threads_debug;
  68
  69 /* Globally enabled events.  */
  70 volatile td_thr_events_t __pthread_threads_events;
  71
  72 /* Pointer to thread descriptor with last event.  */
  73 volatile pthread_descr __pthread_last_event;
  74
  75 /* Mapping from stack segment to thread descriptor. */
  76 /* Stack segment numbers are also indices into the __pthread_handles array. */
  77 /* Stack segment number 0 is reserved for the initial thread. */
  78
  79 static __inline__ pthread_descr thread_segment(int seg)
  80 {
  81   return (pthread_descr)(THREAD_STACK_START_ADDRESS - (seg - 1) * STACK_SIZE)
  82          - 1;
  83 }
  84
  85 /* Flag set in signal handler to record child termination */
  86
  87 static volatile int terminated_children = 0;
  88
  89 /* Flag set when the initial thread is blocked on pthread_exit waiting
  90    for all other threads to terminate */
  91
  92 static int main_thread_exiting = 0;
  93
  94 /* Counter used to generate unique thread identifier.
  95    Thread identifier is pthread_threads_counter + segment. */
  96
  97 static pthread_t pthread_threads_counter = 0;
  98
  99 /* Forward declarations */
 100
 101 static int pthread_handle_create(pthread_t *thread, const pthread_attr_t *attr,
 102                                  void * (*start_routine)(void *), void *arg,
 103                                  sigset_t *mask, int father_pid,
 104                                  int report_events,
 105                                  td_thr_events_t *event_maskp);
 106 static void pthread_handle_free(pthread_t th_id);
 107 static void pthread_handle_exit(pthread_descr issuing_thread, int exitcode) attribute_noreturn;
 108 static void pthread_reap_children(void);
 109 static void pthread_kill_all_threads(int sig, int main_thread_also);
 110
 111 /* The server thread managing requests for thread creation and termination */
 112
 113 int attribute_noreturn __pthread_manager(void *arg)
 114 {
 115   int reqfd = (int) (long int) arg;
 116 #ifdef USE_SELECT
 117   struct timeval tv;
 118   fd_set fd;
 119 #else
 120   struct pollfd ufd;
 121 #endif
 122   sigset_t manager_mask;
 123   int n;
 124   struct pthread_request request;
 125
 126   /* If we have special thread_self processing, initialize it.  */
 127 #ifdef INIT_THREAD_SELF
 128   INIT_THREAD_SELF(&__pthread_manager_thread, 1);
 129 #endif
 130   /* Set the error variable.  */
 131   __pthread_manager_thread.p_errnop = &__pthread_manager_thread.p_errno;
 132   __pthread_manager_thread.p_h_errnop = &__pthread_manager_thread.p_h_errno;
 133
 134 #ifdef __UCLIBC_HAS_XLOCALE__
 135   /* Initialize thread's locale to the global locale. */
 136   __pthread_manager_thread.locale = __global_locale;
 137 #endif /* __UCLIBC_HAS_XLOCALE__ */
 138
 139   /* Block all signals except __pthread_sig_cancel and SIGTRAP */
 140   __sigfillset(&manager_mask);
 141   sigdelset(&manager_mask, __pthread_sig_cancel); /* for thread termination */
 142   sigdelset(&manager_mask, SIGTRAP);            /* for debugging purposes */
 143   if (__pthread_threads_debug && __pthread_sig_debug > 0)
 144       sigdelset(&manager_mask, __pthread_sig_debug);
 145   sigprocmask(SIG_SETMASK, &manager_mask, NULL);
 146   /* Raise our priority to match that of main thread */
 147   __pthread_manager_adjust_prio(__pthread_main_thread->p_priority);
 148   /* Synchronize debugging of the thread manager */
 149   n = TEMP_FAILURE_RETRY(read(reqfd, (char *)&request,
 150                                      sizeof(request)));
 151 #ifndef USE_SELECT
 152   ufd.fd = reqfd;
 153   ufd.events = POLLIN;
 154 #endif
 155   /* Enter server loop */
 156   while(1) {
 157 #ifdef USE_SELECT
 158     tv.tv_sec = 2;
 159     tv.tv_usec = 0;
 160     FD_ZERO (&fd);
 161     FD_SET (reqfd, &fd);
 162     n = select (reqfd + 1, &fd, NULL, NULL, &tv);
 163 #else
 164     PDEBUG("before poll\n");
 165     n = poll(&ufd, 1, 2000);
 166     PDEBUG("after poll\n");
 167 #endif
 168     /* Check for termination of the main thread */
 169     if (getppid() == 1) {
 170       pthread_kill_all_threads(SIGKILL, 0);
 171       _exit(0);
 172     }
 173     /* Check for dead children */
 174     if (terminated_children) {
 175       terminated_children = 0;
 176       pthread_reap_children();
 177     }
 178     /* Read and execute request */
 179 #ifdef USE_SELECT
 180     if (n == 1)
 181 #else
 182     if (n == 1 && (ufd.revents & POLLIN))
 183 #endif
 184     {
 185
 186       PDEBUG("before read\n");
 187       n = read(reqfd, (char *)&request, sizeof(request));
 188       PDEBUG("after read, n=%d\n", n);
 189       switch(request.req_kind) {
 190       case REQ_CREATE:
 191         PDEBUG("got REQ_CREATE\n");
 192         request.req_thread->p_retcode =
 193           pthread_handle_create((pthread_t *) &request.req_thread->p_retval,
 194                                 request.req_args.create.attr,
 195                                 request.req_args.create.fn,
 196                                 request.req_args.create.arg,
 197                                 &request.req_args.create.mask,
 198                                 request.req_thread->p_pid,
 199                                 request.req_thread->p_report_events,
 200                                 &request.req_thread->p_eventbuf.eventmask);
 201         PDEBUG("restarting %d\n", request.req_thread);
 202         restart(request.req_thread);
 203         break;
 204       case REQ_FREE:
 205         PDEBUG("got REQ_FREE\n");
 206         pthread_handle_free(request.req_args.free.thread_id);
 207         break;
 208       case REQ_PROCESS_EXIT:
 209         PDEBUG("got REQ_PROCESS_EXIT from %d, exit code = %d\n",
 210         request.req_thread, request.req_args.exit.code);
 211         pthread_handle_exit(request.req_thread,
 212                             request.req_args.exit.code);
 213         break;
 214       case REQ_MAIN_THREAD_EXIT:
 215         PDEBUG("got REQ_MAIN_THREAD_EXIT\n");
 216         main_thread_exiting = 1;
 217         /* Reap children in case all other threads died and the signal handler
 218            went off before we set main_thread_exiting to 1, and therefore did
 219            not do REQ_KICK. */
 220         pthread_reap_children();
 221
 222         if (__pthread_main_thread->p_nextlive == __pthread_main_thread) {
 223           restart(__pthread_main_thread);
 224           /* The main thread will now call exit() which will trigger an
 225              __on_exit handler, which in turn will send REQ_PROCESS_EXIT
 226              to the thread manager. In case you are wondering how the
 227              manager terminates from its loop here. */
 228         }
 229         break;
 230       case REQ_POST:
 231         PDEBUG("got REQ_POST\n");
 232         __new_sem_post(request.req_args.post);
 233         break;
 234       case REQ_DEBUG:
 235         PDEBUG("got REQ_DEBUG\n");
 236         /* Make gdb aware of new thread and gdb will restart the
 237            new thread when it is ready to handle the new thread. */
 238         if (__pthread_threads_debug && __pthread_sig_debug > 0) {
 239           PDEBUG("about to call raise(__pthread_sig_debug)\n");
 240           raise(__pthread_sig_debug);
 241         }
 242       case REQ_KICK:
 243         /* This is just a prod to get the manager to reap some
 244            threads right away, avoiding a potential delay at shutdown. */
 245         break;
 246       }
 247     }
 248   }
 249 }
 250
 251 int __pthread_manager_event(void *arg)
 252 {
 253   /* If we have special thread_self processing, initialize it.  */
 254 #ifdef INIT_THREAD_SELF
 255   INIT_THREAD_SELF(&__pthread_manager_thread, 1);
 256 #endif
 257
 258   /* Get the lock the manager will free once all is correctly set up.  */
 259   __pthread_lock (THREAD_GETMEM((&__pthread_manager_thread), p_lock), NULL);
 260   /* Free it immediately.  */
 261   __pthread_unlock (THREAD_GETMEM((&__pthread_manager_thread), p_lock));
 262
 263   return __pthread_manager(arg);
 264 }
 265
 266 /* Process creation */
 267 static int
 268 attribute_noreturn
 269 pthread_start_thread(void *arg)
 270 {
 271   pthread_descr self = (pthread_descr) arg;
 272   struct pthread_request request;
 273   void * outcome;
 274   /* Initialize special thread_self processing, if any.  */
 275 #ifdef INIT_THREAD_SELF
 276   INIT_THREAD_SELF(self, self->p_nr);
 277 #endif
 278   PDEBUG("\n");
 279   /* Make sure our pid field is initialized, just in case we get there
 280      before our father has initialized it. */
 281   THREAD_SETMEM(self, p_pid, getpid());
 282   /* Initial signal mask is that of the creating thread. (Otherwise,
 283      we'd just inherit the mask of the thread manager.) */
 284   sigprocmask(SIG_SETMASK, &self->p_start_args.mask, NULL);
 285   /* Set the scheduling policy and priority for the new thread, if needed */
 286   if (THREAD_GETMEM(self, p_start_args.schedpolicy) >= 0)
 287     /* Explicit scheduling attributes were provided: apply them */
 288     sched_setscheduler(THREAD_GETMEM(self, p_pid),
 289                          THREAD_GETMEM(self, p_start_args.schedpolicy),
 290                          &self->p_start_args.schedparam);
 291   else if (__pthread_manager_thread.p_priority > 0)
 292     /* Default scheduling required, but thread manager runs in realtime
 293        scheduling: switch new thread to SCHED_OTHER policy */
 294     {
 295       struct sched_param default_params;
 296       default_params.sched_priority = 0;
 297       sched_setscheduler(THREAD_GETMEM(self, p_pid),
 298                            SCHED_OTHER, &default_params);
 299     }
 300   /* Make gdb aware of new thread */
 301   if (__pthread_threads_debug && __pthread_sig_debug > 0) {
 302     request.req_thread = self;
 303     request.req_kind = REQ_DEBUG;
 304     TEMP_FAILURE_RETRY(write(__pthread_manager_request,
 305                 (char *) &request, sizeof(request)));
 306     suspend(self);
 307   }
 308   /* Run the thread code */
 309   outcome = self->p_start_args.start_routine(THREAD_GETMEM(self,
 310                                                            p_start_args.arg));
 311   /* Exit with the given return value */
 312   __pthread_do_exit(outcome, CURRENT_STACK_FRAME);
 313 }
 314
 315 static int
 316 attribute_noreturn
 317 pthread_start_thread_event(void *arg)
 318 {
 319   pthread_descr self = (pthread_descr) arg;
 320
 321 #ifdef INIT_THREAD_SELF
 322   INIT_THREAD_SELF(self, self->p_nr);
 323 #endif
 324   /* Make sure our pid field is initialized, just in case we get there
 325      before our father has initialized it. */
 326   THREAD_SETMEM(self, p_pid, getpid());
 327   /* Get the lock the manager will free once all is correctly set up.  */
 328   __pthread_lock (THREAD_GETMEM(self, p_lock), NULL);
 329   /* Free it immediately.  */
 330   __pthread_unlock (THREAD_GETMEM(self, p_lock));
 331
 332   /* Continue with the real function.  */
 333   pthread_start_thread (arg);
 334 }
 335
 336 static int pthread_allocate_stack(const pthread_attr_t *attr,
 337                                   pthread_descr default_new_thread,
 338                                   int pagesize,
 339                                   pthread_descr * out_new_thread,
 340                                   char ** out_new_thread_bottom,
 341                                   char ** out_guardaddr,
 342                                   size_t * out_guardsize)
 343 {
 344   pthread_descr new_thread;
 345   char * new_thread_bottom;
 346   char * guardaddr;
 347   size_t stacksize, guardsize;
 348
 349   if (attr != NULL && attr->__stackaddr_set)
 350     {
 351       /* The user provided a stack. */
 352       new_thread = (pthread_descr) ((long)(attr->__stackaddr) & -sizeof(void *)) - 1;
 353       new_thread_bottom = (char *) attr->__stackaddr - attr->__stacksize;
 354       guardaddr = NULL;
 355       guardsize = 0;
 356       __pthread_nonstandard_stacks = 1;
 357 #ifndef __ARCH_USE_MMU__
 358       /* check the initial thread stack boundaries so they don't overlap */
 359       NOMMU_INITIAL_THREAD_BOUNDS((char *) new_thread, (char *) new_thread_bottom);
 360
 361       PDEBUG("initial stack: bos=%p, tos=%p\n", __pthread_initial_thread_bos,
 362             __pthread_initial_thread_tos);
 363 #endif
 364     }
 365   else
 366     {
 367 #ifdef __ARCH_USE_MMU__
 368       stacksize = STACK_SIZE - pagesize;
 369       if (attr != NULL)
 370         stacksize = MIN(stacksize, roundup(attr->__stacksize, pagesize));
 371       /* Allocate space for stack and thread descriptor at default address */
 372       new_thread = default_new_thread;
 373       new_thread_bottom = (char *) (new_thread + 1) - stacksize;
 374       if (mmap((caddr_t)((char *)(new_thread + 1) - INITIAL_STACK_SIZE),
 375                INITIAL_STACK_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC,
 376                MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED | MAP_GROWSDOWN,
 377                -1, 0) == MAP_FAILED)
 378         /* Bad luck, this segment is already mapped. */
 379         return -1;
 380       /* We manage to get a stack.  Now see whether we need a guard
 381          and allocate it if necessary.  Notice that the default
 382          attributes (stack_size = STACK_SIZE - pagesize) do not need
 383          a guard page, since the RLIMIT_STACK soft limit prevents stacks
 384          from running into one another. */
 385       if (stacksize == (size_t) (STACK_SIZE - pagesize))
 386         {
 387           /* We don't need a guard page. */
 388           guardaddr = NULL;
 389           guardsize = 0;
 390         }
 391       else
 392         {
 393           /* Put a bad page at the bottom of the stack */
 394           guardsize = attr->__guardsize;
 395           guardaddr = (void *)new_thread_bottom - guardsize;
 396           if (mmap((caddr_t) guardaddr, guardsize, 0, MAP_FIXED, -1, 0)
 397               == MAP_FAILED)
 398             {
 399               /* We don't make this an error.  */
 400               guardaddr = NULL;
 401               guardsize = 0;
 402             }
 403         }
 404 #else
 405       /* We cannot mmap to this huge chunk of stack space when we don't have
 406        * an MMU. Pretend we are using a user provided stack even if there was
 407        * none provided by the user. Thus, we get around the mmap and reservation
 408        * of a huge stack segment. -StS */
 409
 410       stacksize = INITIAL_STACK_SIZE;
 411       /* The user may want to use a non-default stacksize */
 412       if (attr != NULL)
 413         {
 414           stacksize = attr->__stacksize;
 415         }
 416
 417       /* malloc a stack - memory from the bottom up */
 418       if ((new_thread_bottom = malloc(stacksize)) == NULL)
 419         {
 420           /* bad luck, we cannot malloc any more */
 421           return -1 ;
 422         }
 423       PDEBUG("malloced chunk: base=%p, size=0x%04x\n", new_thread_bottom, stacksize);
 424
 425       /* Set up the pointers. new_thread marks the TOP of the stack frame and
 426        * the address of the pthread_descr struct at the same time. Therefore we
 427        * must account for its size and fit it in the malloc()'ed block. The
 428        * value of `new_thread' is then passed to clone() as the stack argument.
 429        *
 430        *               ^ +------------------------+
 431        *               | |  pthread_descr struct  |
 432        *               | +------------------------+  <- new_thread
 433        * malloc block  | |                        |
 434        *               | |  thread stack          |
 435        *               | |                        |
 436        *               v +------------------------+  <- new_thread_bottom
 437        *
 438        * Note: The calculated value of new_thread must be word aligned otherwise
 439        * the kernel chokes on a non-aligned stack frame. Choose the lower
 440        * available word boundary.
 441        */
 442       new_thread = ((pthread_descr) ((int)(new_thread_bottom + stacksize) & -sizeof(void*))) - 1;
 443       guardaddr = NULL;
 444       guardsize = 0;
 445
 446       PDEBUG("thread stack: bos=%p, tos=%p\n", new_thread_bottom, new_thread);
 447
 448       /* check the initial thread stack boundaries so they don't overlap */
 449       NOMMU_INITIAL_THREAD_BOUNDS((char *) new_thread, (char *) new_thread_bottom);
 450
 451       PDEBUG("initial stack: bos=%p, tos=%p\n", __pthread_initial_thread_bos,
 452              __pthread_initial_thread_tos);
 453
 454       /* on non-MMU systems we always have non-standard stack frames */
 455       __pthread_nonstandard_stacks = 1;
 456
 457 #endif /* __ARCH_USE_MMU__ */
 458     }
 459
 460   /* Clear the thread data structure.  */
 461   memset (new_thread, '\0', sizeof (*new_thread));
 462   *out_new_thread = new_thread;
 463   *out_new_thread_bottom = new_thread_bottom;
 464   *out_guardaddr = guardaddr;
 465   *out_guardsize = guardsize;
 466   return 0;
 467 }
 468
 469 static int pthread_handle_create(pthread_t *thread, const pthread_attr_t *attr,
 470                                  void * (*start_routine)(void *), void *arg,
 471                                  sigset_t * mask, int father_pid,
 472                                  int report_events,
 473                                  td_thr_events_t *event_maskp)
 474 {
 475   size_t sseg;
 476   int pid;
 477   pthread_descr new_thread;
 478   char * new_thread_bottom;
 479   pthread_t new_thread_id;
 480   char *guardaddr = NULL;
 481   size_t guardsize = 0;
 482   int pagesize = getpagesize();
 483   int saved_errno = 0;
 484
 485   /* First check whether we have to change the policy and if yes, whether
 486      we can  do this.  Normally this should be done by examining the
 487      return value of the sched_setscheduler call in pthread_start_thread
 488      but this is hard to implement.  FIXME  */
 489   if (attr != NULL && attr->__schedpolicy != SCHED_OTHER && geteuid () != 0)
 490     return EPERM;
 491   /* Find a free segment for the thread, and allocate a stack if needed */
 492   for (sseg = 2; ; sseg++)
 493     {
 494       if (sseg >= PTHREAD_THREADS_MAX)
 495         return EAGAIN;
 496       if (__pthread_handles[sseg].h_descr != NULL)
 497         continue;
 498       if (pthread_allocate_stack(attr, thread_segment(sseg), pagesize,
 499                                  &new_thread, &new_thread_bottom,
 500                                  &guardaddr, &guardsize) == 0)
 501         break;
 502 #ifndef __ARCH_USE_MMU__
 503       else
 504         /* When there is MMU, mmap () is used to allocate the stack. If one
 505          * segment is already mapped, we should continue to see if we can
 506          * use the next one. However, when there is no MMU, malloc () is used.
 507          * It's waste of CPU cycles to continue to try if it fails.  */
 508         return EAGAIN;
 509 #endif
 510     }
 511   __pthread_handles_num++;
 512   /* Allocate new thread identifier */
 513   pthread_threads_counter += PTHREAD_THREADS_MAX;
 514   new_thread_id = sseg + pthread_threads_counter;
 515   /* Initialize the thread descriptor.  Elements which have to be
 516      initialized to zero already have this value.  */
 517   new_thread->p_tid = new_thread_id;
 518   new_thread->p_lock = &(__pthread_handles[sseg].h_lock);
 519   new_thread->p_cancelstate = PTHREAD_CANCEL_ENABLE;
 520   new_thread->p_canceltype = PTHREAD_CANCEL_DEFERRED;
 521   new_thread->p_errnop = &new_thread->p_errno;
 522   new_thread->p_h_errnop = &new_thread->p_h_errno;
 523 #ifdef __UCLIBC_HAS_XLOCALE__
 524   /* Initialize thread's locale to the global locale. */
 525   new_thread->locale = __global_locale;
 526 #endif /* __UCLIBC_HAS_XLOCALE__ */
 527   new_thread->p_guardaddr = guardaddr;
 528   new_thread->p_guardsize = guardsize;
 529   new_thread->p_self = new_thread;
 530   new_thread->p_nr = sseg;
 531   /* Initialize the thread handle */
 532   __pthread_init_lock(&__pthread_handles[sseg].h_lock);
 533   __pthread_handles[sseg].h_descr = new_thread;
 534   __pthread_handles[sseg].h_bottom = new_thread_bottom;
 535   /* Determine scheduling parameters for the thread */
 536   new_thread->p_start_args.schedpolicy = -1;
 537   if (attr != NULL) {
 538     new_thread->p_detached = attr->__detachstate;
 539     new_thread->p_userstack = attr->__stackaddr_set;
 540
 541     switch(attr->__inheritsched) {
 542     case PTHREAD_EXPLICIT_SCHED:
 543       new_thread->p_start_args.schedpolicy = attr->__schedpolicy;
 544       memcpy (&new_thread->p_start_args.schedparam, &attr->__schedparam,
 545               sizeof (struct sched_param));
 546       break;
 547     case PTHREAD_INHERIT_SCHED:
 548       new_thread->p_start_args.schedpolicy = sched_getscheduler(father_pid);
 549       sched_getparam(father_pid, &new_thread->p_start_args.schedparam);
 550       break;
 551     }
 552     new_thread->p_priority =
 553       new_thread->p_start_args.schedparam.sched_priority;
 554   }
 555   /* Finish setting up arguments to pthread_start_thread */
 556   new_thread->p_start_args.start_routine = start_routine;
 557   new_thread->p_start_args.arg = arg;
 558   new_thread->p_start_args.mask = *mask;
 559   /* Raise priority of thread manager if needed */
 560   __pthread_manager_adjust_prio(new_thread->p_priority);
 561   /* Do the cloning.  We have to use two different functions depending
 562      on whether we are debugging or not.  */
 563   pid = 0;     /* Note that the thread never can have PID zero.  */
 564
 565
 566   /* ******************************************************** */
 567   /*  This code was moved from below to cope with running threads
 568    *  on uClinux systems.  See comment below...
 569    * Insert new thread in doubly linked list of active threads */
 570   new_thread->p_prevlive = __pthread_main_thread;
 571   new_thread->p_nextlive = __pthread_main_thread->p_nextlive;
 572   __pthread_main_thread->p_nextlive->p_prevlive = new_thread;
 573   __pthread_main_thread->p_nextlive = new_thread;
 574   /* ********************************************************* */
 575
 576   if (report_events)
 577     {
 578       /* See whether the TD_CREATE event bit is set in any of the
 579          masks.  */
 580       int idx = __td_eventword (TD_CREATE);
 581       uint32_t mask = __td_eventmask (TD_CREATE);
 582
 583       if ((mask & (__pthread_threads_events.event_bits[idx]
 584                    | event_maskp->event_bits[idx])) != 0)
 585         {
 586           /* Lock the mutex the child will use now so that it will stop.  */
 587           __pthread_lock(new_thread->p_lock, NULL);
 588
 589           /* We have to report this event.  */
 590 #ifdef __ia64__
 591           pid = __clone2(pthread_start_thread_event, (void **) new_thread,
 592                         (char *)new_thread - new_thread_bottom,
 593                         CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
 594                         __pthread_sig_cancel, new_thread);
 595 #else
 596           pid = clone(pthread_start_thread_event, (void **) new_thread,
 597                         CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
 598                         __pthread_sig_cancel, new_thread);
 599 #endif
 600
 601           saved_errno = errno;
 602           if (pid != -1)
 603             {
 604               /* Now fill in the information about the new thread in
 605                  the newly created thread's data structure.  We cannot let
 606                  the new thread do this since we don't know whether it was
 607                  already scheduled when we send the event.  */
 608               new_thread->p_eventbuf.eventdata = new_thread;
 609               new_thread->p_eventbuf.eventnum = TD_CREATE;
 610               __pthread_last_event = new_thread;
 611
 612               /* We have to set the PID here since the callback function
 613                  in the debug library will need it and we cannot guarantee
 614                  the child got scheduled before the debugger.  */
 615               new_thread->p_pid = pid;
 616
 617               /* Now call the function which signals the event.  */
 618               __linuxthreads_create_event ();
 619
 620               /* Now restart the thread.  */
 621               __pthread_unlock(new_thread->p_lock);
 622             }
 623         }
 624     }
 625   if (pid == 0)
 626     {
 627       PDEBUG("cloning new_thread = %p\n", new_thread);
 628 #ifdef __ia64__
 629       pid = __clone2(pthread_start_thread, (void **) new_thread,
 630                         (char *)new_thread - new_thread_bottom,
 631                     CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
 632                     __pthread_sig_cancel, new_thread);
 633 #else
 634       pid = clone(pthread_start_thread, (void **) new_thread,
 635                     CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
 636                     __pthread_sig_cancel, new_thread);
 637 #endif
 638       saved_errno = errno;
 639     }
 640   /* Check if cloning succeeded */
 641   if (pid == -1) {
 642     /********************************************************
 643      * Code inserted to remove the thread from our list of active
 644      * threads in case of failure (needed to cope with uClinux),
 645      * See comment below. */
 646     new_thread->p_nextlive->p_prevlive = new_thread->p_prevlive;
 647     new_thread->p_prevlive->p_nextlive = new_thread->p_nextlive;
 648     /********************************************************/
 649
 650     /* Free the stack if we allocated it */
 651     if (attr == NULL || !attr->__stackaddr_set)
 652       {
 653 #ifdef __ARCH_USE_MMU__
 654         if (new_thread->p_guardsize != 0)
 655           munmap(new_thread->p_guardaddr, new_thread->p_guardsize);
 656         munmap((caddr_t)((char *)(new_thread+1) - INITIAL_STACK_SIZE),
 657                INITIAL_STACK_SIZE);
 658 #else
 659         free(new_thread_bottom);
 660 #endif /* __ARCH_USE_MMU__ */
 661       }
 662     __pthread_handles[sseg].h_descr = NULL;
 663     __pthread_handles[sseg].h_bottom = NULL;
 664     __pthread_handles_num--;
 665     return errno;
 666   }
 667   PDEBUG("new thread pid = %d\n", pid);
 668
 669 #if 0
 670   /* ***********************************************************
 671    This code has been moved before the call to clone().  In uClinux,
 672    the use of wait on a semaphore is dependant upon that the child so
 673    the child must be in the active threads list. This list is used in
 674    pthread_find_self() to get the pthread_descr of self. So, if the
 675    child calls sem_wait before this code is executed , it will hang
 676    forever and initial_thread will instead be posted by a sem_post
 677    call. */
 678
 679   /* Insert new thread in doubly linked list of active threads */
 680   new_thread->p_prevlive = __pthread_main_thread;
 681   new_thread->p_nextlive = __pthread_main_thread->p_nextlive;
 682   __pthread_main_thread->p_nextlive->p_prevlive = new_thread;
 683   __pthread_main_thread->p_nextlive = new_thread;
 684   /************************************************************/
 685 #endif
 686
 687   /* Set pid field of the new thread, in case we get there before the
 688      child starts. */
 689   new_thread->p_pid = pid;
 690   /* We're all set */
 691   *thread = new_thread_id;
 692   return 0;
 693 }
 694
 695
 696 /* Try to free the resources of a thread when requested by pthread_join
 697    or pthread_detach on a terminated thread. */
 698
 699 static void pthread_free(pthread_descr th)
 700 {
 701   pthread_handle handle;
 702   pthread_readlock_info *iter, *next;
 703   char *h_bottom_save;
 704
 705   /* Make the handle invalid */
 706   handle =  thread_handle(th->p_tid);
 707   __pthread_lock(&handle->h_lock, NULL);
 708   h_bottom_save = handle->h_bottom;
 709   handle->h_descr = NULL;
 710   handle->h_bottom = (char *)(-1L);
 711   __pthread_unlock(&handle->h_lock);
 712 #ifdef FREE_THREAD_SELF
 713   FREE_THREAD_SELF(th, th->p_nr);
 714 #endif
 715   /* One fewer threads in __pthread_handles */
 716   __pthread_handles_num--;
 717
 718   /* Destroy read lock list, and list of free read lock structures.
 719      If the former is not empty, it means the thread exited while
 720      holding read locks! */
 721
 722   for (iter = th->p_readlock_list; iter != NULL; iter = next)
 723     {
 724       next = iter->pr_next;
 725       free(iter);
 726     }
 727
 728   for (iter = th->p_readlock_free; iter != NULL; iter = next)
 729     {
 730       next = iter->pr_next;
 731       free(iter);
 732     }
 733
 734   /* If initial thread, nothing to free */
 735   if (th == &__pthread_initial_thread) return;
 736 #ifdef __ARCH_USE_MMU__
 737   if (!th->p_userstack)
 738     {
 739       /* Free the stack and thread descriptor area */
 740       if (th->p_guardsize != 0)
 741         munmap(th->p_guardaddr, th->p_guardsize);
 742       munmap((caddr_t) ((char *)(th+1) - STACK_SIZE), STACK_SIZE);
 743     }
 744 #else
 745   /* For non-MMU systems we always malloc the stack, so free it here. -StS */
 746   if (!th->p_userstack) {
 747       free(h_bottom_save);
 748   }
 749 #endif /* __ARCH_USE_MMU__ */
 750 }
 751
 752 /* Handle threads that have exited */
 753
 754 static void pthread_exited(pid_t pid)
 755 {
 756   pthread_descr th;
 757   int detached;
 758   /* Find thread with that pid */
 759   for (th = __pthread_main_thread->p_nextlive;
 760        th != __pthread_main_thread;
 761        th = th->p_nextlive) {
 762     if (th->p_pid == pid) {
 763       /* Remove thread from list of active threads */
 764       th->p_nextlive->p_prevlive = th->p_prevlive;
 765       th->p_prevlive->p_nextlive = th->p_nextlive;
 766       /* Mark thread as exited, and if detached, free its resources */
 767       __pthread_lock(th->p_lock, NULL);
 768       th->p_exited = 1;
 769       /* If we have to signal this event do it now.  */
 770       if (th->p_report_events)
 771         {
 772           /* See whether TD_REAP is in any of the mask.  */
 773           int idx = __td_eventword (TD_REAP);
 774           uint32_t mask = __td_eventmask (TD_REAP);
 775
 776           if ((mask & (__pthread_threads_events.event_bits[idx]
 777                        | th->p_eventbuf.eventmask.event_bits[idx])) != 0)
 778             {
 779               /* Yep, we have to signal the reapage.  */
 780               th->p_eventbuf.eventnum = TD_REAP;
 781               th->p_eventbuf.eventdata = th;
 782               __pthread_last_event = th;
 783
 784               /* Now call the function to signal the event.  */
 785               __linuxthreads_reap_event();
 786             }
 787         }
 788       detached = th->p_detached;
 789       __pthread_unlock(th->p_lock);
 790       if (detached)
 791         pthread_free(th);
 792       break;
 793     }
 794   }
 795   /* If all threads have exited and the main thread is pending on a
 796      pthread_exit, wake up the main thread and terminate ourselves. */
 797   if (main_thread_exiting &&
 798       __pthread_main_thread->p_nextlive == __pthread_main_thread) {
 799     restart(__pthread_main_thread);
 800     /* Same logic as REQ_MAIN_THREAD_EXIT. */
 801   }
 802 }
 803
 804 static void pthread_reap_children(void)
 805 {
 806   pid_t pid;
 807   int status;
 808   PDEBUG("\n");
 809
 810   while ((pid = waitpid(-1, &status, WNOHANG | __WCLONE)) > 0) {
 811     pthread_exited(pid);
 812     if (WIFSIGNALED(status)) {
 813       /* If a thread died due to a signal, send the same signal to
 814          all other threads, including the main thread. */
 815       pthread_kill_all_threads(WTERMSIG(status), 1);
 816       _exit(0);
 817     }
 818   }
 819 }
 820
 821 /* Try to free the resources of a thread when requested by pthread_join
 822    or pthread_detach on a terminated thread. */
 823
 824 static void pthread_handle_free(pthread_t th_id)
 825 {
 826   pthread_handle handle = thread_handle(th_id);
 827   pthread_descr th;
 828
 829   __pthread_lock(&handle->h_lock, NULL);
 830   if (invalid_handle(handle, th_id)) {
 831     /* pthread_reap_children has deallocated the thread already,
 832        nothing needs to be done */
 833     __pthread_unlock(&handle->h_lock);
 834     return;
 835   }
 836   th = handle->h_descr;
 837   if (th->p_exited) {
 838     __pthread_unlock(&handle->h_lock);
 839     pthread_free(th);
 840   } else {
 841     /* The Unix process of the thread is still running.
 842        Mark the thread as detached so that the thread manager will
 843        deallocate its resources when the Unix process exits. */
 844     th->p_detached = 1;
 845     __pthread_unlock(&handle->h_lock);
 846   }
 847 }
 848
 849 /* Send a signal to all running threads */
 850
 851 static void pthread_kill_all_threads(int sig, int main_thread_also)
 852 {
 853   pthread_descr th;
 854   for (th = __pthread_main_thread->p_nextlive;
 855        th != __pthread_main_thread;
 856        th = th->p_nextlive) {
 857     kill(th->p_pid, sig);
 858   }
 859   if (main_thread_also) {
 860     kill(__pthread_main_thread->p_pid, sig);
 861   }
 862 }
 863
 864 /* Process-wide exit() */
 865
 866 static void pthread_handle_exit(pthread_descr issuing_thread, int exitcode)
 867 {
 868   pthread_descr th;
 869   __pthread_exit_requested = 1;
 870   __pthread_exit_code = exitcode;
 871   /* Send the CANCEL signal to all running threads, including the main
 872      thread, but excluding the thread from which the exit request originated
 873      (that thread must complete the exit, e.g. calling atexit functions
 874      and flushing stdio buffers). */
 875   for (th = issuing_thread->p_nextlive;
 876        th != issuing_thread;
 877        th = th->p_nextlive) {
 878     kill(th->p_pid, __pthread_sig_cancel);
 879   }
 880   /* Now, wait for all these threads, so that they don't become zombies
 881      and their times are properly added to the thread manager's times. */
 882   for (th = issuing_thread->p_nextlive;
 883        th != issuing_thread;
 884        th = th->p_nextlive) {
 885     waitpid(th->p_pid, NULL, __WCLONE);
 886   }
 887   restart(issuing_thread);
 888   _exit(0);
 889 }
 890
 891 /* Handler for __pthread_sig_cancel in thread manager thread */
 892
 893 void __pthread_manager_sighandler(int sig attribute_unused)
 894 {
 895     int kick_manager = terminated_children == 0 && main_thread_exiting;
 896     terminated_children = 1;
 897
 898     /* If the main thread is terminating, kick the thread manager loop
 899        each time some threads terminate. This eliminates a two second
 900        shutdown delay caused by the thread manager sleeping in the
 901        call to __poll(). Instead, the thread manager is kicked into
 902        action, reaps the outstanding threads and resumes the main thread
 903        so that it can complete the shutdown. */
 904
 905     if (kick_manager) {
 906         struct pthread_request request;
 907         request.req_thread = 0;
 908         request.req_kind = REQ_KICK;
 909         TEMP_FAILURE_RETRY(write(__pthread_manager_request,
 910                     (char *) &request, sizeof(request)));
 911     }
 912 }
 913
 914 /* Adjust priority of thread manager so that it always run at a priority
 915    higher than all threads */
 916
 917 void __pthread_manager_adjust_prio(int thread_prio)
 918 {
 919   struct sched_param param;
 920
 921   if (thread_prio <= __pthread_manager_thread.p_priority) return;
 922   param.sched_priority =
 923     thread_prio < sched_get_priority_max(SCHED_FIFO)
 924     ? thread_prio + 1 : thread_prio;
 925   sched_setscheduler(__pthread_manager_thread.p_pid, SCHED_FIFO, &param);
 926   __pthread_manager_thread.p_priority = thread_prio;
 927 }