1 /* lwp-pool.c --- implementation of a stoppable, waitable LWP pool.
3 Copyright 2004 Red Hat, Inc.
5 This file is part of RDA, the Red Hat Debug Agent (and library).
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA.
22 Alternative licenses for RDA may be arranged by contacting Red Hat,
27 #define _GNU_SOURCE /* for strerror */
34 #include <sys/types.h>
40 #include "diagnostics.h"
42 int debug_lwp_pool = 0;
45 /* THE LIFETIME OF A TRACED LWP
47 POSIX uses these terms in talking about signals:
49 - To "generate" a signal is to call kill or raise, divide by zero,
52 - To "deliver" a signal is to do whatever that signal's designated
53 action is: ignore it, enter a signal handler, terminate the
54 process, or stop the process.
56 - To "accept" a signal is to have 'sigwait' or a similar function
57 select and return the signal.
59 - A signal is "pending" between the time it is generated and the
62 So, here is the life cycle of a traced LWP:
64 - It is created by fork or vfork and does a PTRACE_TRACEME. The
65 PTRACE_TRACEME makes it a traced, running LWP. When a traced LWP
66 does an exec, it gets a SIGTRAP before executing the first
67 instruction in the new process image, so the LWP will then stop.
69 Or, we attach to it with a PTRACE_ATTACH. This sends a SIGSTOP
70 to the LWP, so it will stop.
72 - While a traced LWP is stopped, we can read and write its
73 registers and memory. We can also send it signals; they become
74 pending on the LWP, and will be reported by waitpid.
76 - A stopped LWP can be set running again in one of two ways:
78 + by doing a PTRACE_CONT, PTRACE_SINGLESTEP, or PTRACE_SYSCALL; or
80 + by sending it a SIGCONT.
82 The ptrace requests all let you specify a signal to be delivered
83 to the process. This is the only way signals (other than
84 SIGKILL) ever get actually delivered: every other signal just
85 gets reported to the debugger via waitpid when delivery is
88 Sending a SIGCONT clears any pending SIGSTOPs; PTRACE_CONT and
89 PTRACE_SINGLESTEP don't have that side effect.
91 (Sending an LWP a SIGKILL via the 'kill' or 'tkill' system calls
92 acts like sending it a SIGKILL followed by a SIGCONT.)
94 - A running LWP may exit or be terminated by a signal at any time,
95 so accessing its memory or registers or sending it a signal is
98 - waitpid will eventually return a status S for a continued LWP:
100 + If WIFEXITED (S) or WIFSIGNALED (S), the LWP no longer exists.
102 + IF WIFSTOPPED (S), the LWP is stopped again, because some
103 signal WSTOPSIG (S) was about to be delivered to it. Here we
104 go back to the second step.
106 Note that the signal WSTOPSIG (S) has not yet been delivered to
107 the process, and is no longer pending on the process. Only
108 signals passed to the ptrace requests get delivered. In
109 effect, the debugger gets to intercept signals before they are
110 delivered, and decide whether to pass them through or not.
111 (The exception is SIGKILL: that always produces a WIFSIGNALED
112 wait status, and terminates the process.)
114 So, to put all that together:
116 - A traced LWP goes back and forth from running to stopped, until
117 eventually it goes from running to exited or killed.
119 - Running->stopped transitions are always attempted signal
120 deliveries, yielding WIFSTOPPED wait statuses.
122 - Stopping->running transitions are generally due to ptrace
123 requests by the debugger. (The debugger could use kill to send
124 SIGCONT, but that's messy.)
126 - Running->exited transitions are due to, duh, the LWP exiting.
128 - Running->killed transitions are due to a signal being delivered
129 to the LWP that is neither ignored nor caught.
132 Under NPTL, this life cycle is a bit different: LWPs simply exit,
133 without creating a zombie; they produce no wait status. The NPTL
134 libthread_db generates a TD_DEATH event for them, but at the kernel
135 level the only indication that they're gone is that the tkill
136 system call fails with ESRCH ("No such process").
138 Under LinuxThreads, LWPs remain zombie processes until they're
139 waited for. Attempts to send them signals while zombies have no
140 effect, but return no error.
145 The major challenge here is implementing the lwp_pool_stop_all
146 function. The only way to stop a running LWP is to send it a
147 SIGSTOP, and then wait for a status acknowledging the stop. But as
148 explained above, a running LWP could stop at any time of its own
149 accord, so sending it a SIGSTOP is always a race. By the time you
150 call waitpid, you don't know whether you'll get a status for the
151 SIGSTOP you just sent, or for something else: some other signal, an
152 exit, or a termination by signal.
154 If the LWP turns out to have exited or died, then that's pretty
155 easy to handle. Your attempt to send a SIGSTOP will get an error,
156 and then you'll get a wait status for the termination. A
157 termination status is always the last status you'll get from wait
158 for that LWP, so there'll be no further record of your SIGSTOP.
160 If the LWP was about to have some other signal delivered to it,
161 then the next wait will return a WIFSTOPPED status for that signal;
162 we'll have to continue the LWP and wait again until we get the
163 status for our SIGSTOP. The kernel forgets about any signals the
164 LWP has received once it has reported them to us, so it's up to us
165 to keep track of them and report them via lwp_pool_waitpid. */
169 /* The LWP structure. */
171 /* The states an LWP we're managing might be in.
173 For the purposes of these states, we classify wait statuses as
176 - An "interesting" wait status is one that isn't a result of us
177 attaching to the LWP or sending it a SIGSTOP for
178 lwp_pool_stop_all. It indicates something that happened to the
179 LWP other than as a result of this code's fiddling with it. We
180 report all interesting wait statuses via lwp_pool_waitpid.
182 - A "boring" wait status is one that results from our attaching to
183 it or sending it a SIGSTOP for lwp_pool_stop_all. We do not
184 report these via lwp_pool_stop_all.
186 Most of these states are combinations of various semi-independent
187 factors, which we'll name and define here:
189 - RUNNING / STOPPED / DEAD: These are the kernel states of the LWP:
190 it's either running freely and could stop at any moment, is
191 stopped but can be continued, or has died.
193 - INTERESTING: this LWP has stopped or died with a wait status that
194 has not yet been reported via lwp_pool_waitpid. It is on the
195 interesting LWP queue.
197 This never applies to RUNNING LWPs: we never continue an
198 INTERESTING LWP until we've reported its status.
200 It always applies to DEAD LWPs.
202 - STOP PENDING: we've sent this LWP a SIGSTOP, or attached to it,
203 but we haven't yet received the boring WIFSTOPPED SIGSTOP status.
205 This never applies to DEAD LWPs; the wait status that announces a
206 LWP's death is always the last for that LWP.
208 We could certainly represent these with independent bits or
209 bitfields, but not all combinations are possible. So instead, we
210 assign each possible combination a distinct enum value, to make it
211 easier to enumerate all the valid possibilities and be sure we've
216 /* An uninitialized LWP entry. Only the lookup function itself,
217 hash_find, creates entries in this state, and any function
218 that calls that should put the entry in a meaningful state before
220 lwp_state_uninitialized,
222 /* RUNNING. This LWP is running --- last we knew. It may have
223 exited or been terminated by a signal, or it may have had a
224 signal about to be delivered to it. We won't know until we wait
228 /* STOPPED. This LWP has stopped, and has no interesting status to
232 /* STOPPED, INTERESTING. This LWP has stopped with an interesting
233 wait status, which we haven't yet reported to the user. It is on
234 the interesting LWP queue. */
235 lwp_state_stopped_interesting,
237 /* DEAD, INTERESTING. This LWP exited, or was killed by a signal.
238 This LWP is on the interesting LWP queue. Once we've reported it
239 to the user, we'll delete it altogether. */
240 lwp_state_dead_interesting,
242 /* RUNNING, STOP PENDING. This LWP was running, and will eventually
243 stop with a boring WIFSTOPPED SIGSTOP status, but may report an
244 interesting status first.
246 It's always safe to wait for an LWP in this state, so we do that
247 as soon as possible; there shouldn't be any LWPs in this state
248 between calls to public lwp_pool functions. This is an
249 internal-use state. */
250 lwp_state_running_stop_pending,
252 /* STOPPED, STOP PENDING. This LWP is stopped, and has no
253 interesting status to report, but still has a boring status on
254 the way. After we report the status for a STOPPED, STOP PENDING,
255 and INTERESTING LWP, this is the state it enters.
257 See the note below on why this state is not avoidable. */
258 lwp_state_stopped_stop_pending,
260 /* STOPPED, STOP PENDING, and INTERESTING. This LWP has stopped with
261 an interesting wait status. We're also expecting a boring wait
263 lwp_state_stopped_stop_pending_interesting,
268 /* Why we need lwp_state_stopped_stop_pending:
270 I originally thought we could avoid having this state at all by
271 simply always continuing STOPPED, STOP PENDING, INTERESTING LWPs
272 in lwp_pool_waitpid as soon as we reported their wait status, and
273 then waiting for them immediately, making them either STOPPED and
274 un-INTERESTING, or STOPPED, STOP PENDING, and INTERESTING again.
276 But the user has the right to call lwp_pool_continue_lwp on any LWP
277 they've just gotten a wait status for --- and this simplification
278 interferes with that. First, note that you mustn't call
279 continue_lwp on an interesting LWP: you might get yet another
280 interesting wait status, and we don't want to queue up multiple
281 interesting wait statuses per LWP --- the job is complex enough
282 already. Then, note that the proposed simplification means that
283 lwp_pool_waitpid could return a status for some LWP, and have that
284 LWP still be interesting. If that happens, then you've got an LWP
285 the user has the right to continue, but that can't actually be
288 I first tried to deal with this by having lwp_pool_continue_lwp
289 simply do nothing if the user continues an interesting LWP. After
290 all, it's already in the interesting queue, so lwp_pool_waitpid
291 will report it, and the user will be none the wiser. But that's
292 wrong: the user can specify a signal to deliver when they continue
293 the LWP, and the only way signals are ever delivered to traced LWPs
294 is via ptrace continue and single-step requests. You can't use
295 kill: that *generates* a signal, it doesn't *deliver* it. You'd
296 just get the signal back again via waitpid. So if we don't
297 actually continue the LWP with the user's signal, we've lost our
298 only chance to deliver it.
300 Clear as mud, no doubt. I did my best. */
305 /* This lwp's PID. */
308 /* The state this LWP is in. */
309 enum lwp_state state;
311 /* If STATE is one of the lwp_state_*_interesting states, then this
312 LWP is on the interesting LWP queue, headed by interesting_queue.
314 If STATE is lwp_state_running_stop_pending, then this LWP is on
315 the stopping LWP queue, stopping_queue. (Note that
316 stopping_queue is local to lwp_pool_stop_all; no LWP should be in
317 that state by the time that function returns. */
318 struct lwp *prev, *next;
320 /* If STATE is one of the lwp_state_*_interesting states, then
321 STATUS is the interesting wait status. */
324 /* Indicates the stepping status. We must be prepared to step the
325 given lwp upon continue since it's possible to get thread notification
326 signals prior to a step actually occuring. Receipt of a SIGTRAP is
327 sufficient to clear this flag. */
333 /* The LWP hash table. */
335 /* A hash table of all the live LWP's we know about.
336 hash_population is the number of occupied entries in the table.
338 hash_size is the total length of the table; it is always a power of
339 two. We resize the table to ensure that it is between 12.5% and
340 50% occupied. (Since the table's size is a power of two, resizing
341 the table will always halve or double the populated ratio. So
342 there should be comfortably more than a factor of two between the
343 maximum and minimum populations, for hysteresis.)
345 The first slot we try is hash[PID % hash_size]. After C
346 collisions, we try hash[(PID + C * STRIDE) % hash_size], where
347 STRIDE is hash_size / 4 + 1. The kernel assigns pids sequentially,
348 so a STRIDE of 1, as many hash tables use, would make further
349 collisions very likely. But since hash_size is always a power of
350 two, and hash_size / 4 + 1 is always odd, they are always
351 relatively prime, so stepping by that many elements each time will
352 eventually visit every table element. A constant odd stride would
353 be fine, but it's nice to have it scale with the overall population
356 The table is an array of pointers to lwp's, rather than a direct
357 array of lwp structures, so that pointers to lwp's don't become
358 invalid when we rehash or delete entries. */
359 static size_t hash_size, hash_population;
360 static struct lwp **hash;
362 /* The minimum size for the hash table. Small for testing. */
363 enum { minimum_hash_size = 8 };
366 /* Return the hash slot for pid PID. */
368 hash_slot (pid_t pid, size_t size)
370 return pid & (size - 1);
374 /* If there was a collision in SLOT, return the next slot. */
376 hash_next_slot (int slot, size_t size)
378 int stride = size / 4 + 1;
380 return (slot + stride) & (size - 1);
384 /* Return the earliest empty hash slot for PID. */
386 hash_empty_slot (pid_t pid)
388 int slot = hash_slot (pid, hash_size);
390 /* Since hash_next_slot will eventually visit every slot, and we
391 know the table isn't full, this loop will terminate. */
393 slot = hash_next_slot (slot, hash_size);
399 /* Return a new, empty hash table containing ELEMENTS elements. This has
400 no effect on the LWP pool's global variables. */
402 make_hash_table (size_t elements)
405 size_t size = elements * sizeof (*hash);
407 hash = malloc (size);
408 memset (hash, 0, size);
414 /* Resize hash as needed to ensure that the table's population is
415 between 12.5% and 50% of its size. */
419 struct lwp **new_hash;
420 size_t new_hash_size;
421 int new_hash_population; /* just for sanity checking */
424 /* Pick a new size. */
425 new_hash_size = hash_size;
426 while (new_hash_size < hash_population * 2)
428 while (new_hash_size > minimum_hash_size
429 && new_hash_size > hash_population * 8)
432 /* We may have re-chosen the minimum table size. */
433 if (new_hash_size == hash_size)
436 new_hash = make_hash_table (new_hash_size);
437 new_hash_population = 0;
439 /* Re-insert all the old lwp's in the new table. */
440 for (i = 0; i < hash_size; i++)
443 struct lwp *l = hash[i];
444 int new_slot = hash_slot (l->pid, new_hash_size);
446 while (new_hash[new_slot])
447 new_slot = hash_next_slot (new_slot, new_hash_size);
449 new_hash[new_slot] = l;
450 new_hash_population++;
453 if (new_hash_population != hash_population)
454 fprintf (stderr, "ERROR: rehashing changed population from %d to %d\n",
455 hash_population, new_hash_population);
457 /* Free the old table, and drop in the new one. */
460 hash_size = new_hash_size;
464 /* Find an existing hash table entry for LWP. If there is none,
465 create one in state lwp_state_uninitialized. */
467 hash_find (pid_t lwp)
472 /* Do we need to initialize the hash table? */
475 hash_size = minimum_hash_size;
476 hash = make_hash_table (hash_size);
480 for (slot = hash_slot (lwp, hash_size);
482 slot = hash_next_slot (slot, hash_size))
483 if (hash[slot]->pid == lwp)
486 /* There is no entry for this lwp. Create one. */
487 l = malloc (sizeof (*l));
489 l->state = lwp_state_uninitialized;
490 l->next = l->prev = NULL;
496 /* Do we need to resize? */
497 if (hash_size < hash_population * 2)
504 /* Remove the LWP L from the pool. This does not free L itself. */
506 hash_delete (struct lwp *l)
510 for (slot = hash_slot (l->pid, hash_size);
512 slot = hash_next_slot (slot, hash_size))
513 if (hash[slot]->pid == l->pid)
516 /* We shouldn't ever be asked to delete a 'struct lwp' that isn't in
520 /* There should be only one 'struct lwp' with a given PID. */
521 assert (hash[slot] == l);
523 /* Deleting from this kind of hash table is interesting, because of
524 the way we handle collisions.
526 For the sake of discussion, pretend that STRIDE is 1 (the
527 reasoning is basically the same either way, but this has less
530 When we search for an LWP that hashes to slot S, because there
531 may be collisions, the set of slots we'll actually search is the
532 contiguous run of non-empty table entries that starts at S,
533 heading towards higher indices (and possibly wrapping around at
534 the end of the table). When we find an empty table entry, we
537 When we delete an LWP, if we simply set its slot to zero, that
538 could cause us to cut off later searches too early. For example,
539 if three LWP's all hash to slot S, and have been placed in slots
540 S, S+1, and S+2, and we set slot S+1 to zero, then a search for
541 the LWP at S+2 will start at S, and then stop at S+1 without ever
542 seeing the right entry at S+2.
544 Some implementations place a special "deleted" marker in the slot
545 to let searches continue. But then it's hard to ensure that the
546 table doesn't get choked with deleted markers; and should deleted
547 markers count towards the population for resizing purposes? It's
550 So after clearing a slot, we walk the remainder of the contiguous
551 run of entries and re-hash them all. If the hash function is
552 doing a good job distributing entries across the table,
553 contiguous runs should be short. And it had better be good,
554 because this is potentially quadratic.
556 Of course, if we're going to resize the table, that removes all
557 deleted elements, so we needn't bother with any of this. */
562 if (hash_size > minimum_hash_size
563 && hash_size > hash_population * 8)
566 for (slot = hash_next_slot (slot, hash_size);
568 slot = hash_next_slot (slot, hash_size))
570 struct lwp *refugee = hash[slot];
573 hash[hash_empty_slot (refugee->pid)] = refugee;
579 /* Queue functions. */
581 /* Insert L at the end of the queue headed by QUEUE. */
583 queue_enqueue (struct lwp *queue, struct lwp *l)
585 assert (! l->next && ! l->prev);
588 l->prev = queue->prev;
594 /* If L is part of some queue, remove it. */
596 queue_delete (struct lwp *l)
598 assert (l->next && l->prev);
600 l->next->prev = l->prev;
601 l->prev->next = l->next;
602 l->next = l->prev = NULL;
606 /* Return non-zero if there is anything in QUEUE, zero otherwise. */
608 queue_non_empty (struct lwp *queue)
610 return queue->next != queue;
614 /* Return the first LWP from QUEUE, but don't remove it. If QUEUE is
615 empty, return NULL. */
617 queue_first (struct lwp *queue)
619 struct lwp *l = queue->next;
629 /* Hashing LWP's, but with error checking and cleanup. */
632 /* Add an entry for LWP to the pool and return it. There should be no
633 existing entry for LWP; if there is, clean it up. The returned
634 LWP's state is always lwp_state_uninitialized; the caller must
635 initialize the LWP before returning. */
637 hash_find_new (pid_t lwp)
639 struct lwp *l = hash_find (lwp);
641 if (l->state != lwp_state_uninitialized)
643 fprintf (stderr, "ERROR: new LWP %d already in table\n", (int) lwp);
645 /* Remove ourselves from any queue we might be in. */
650 l->state = lwp_state_uninitialized;
656 /* Find an entry for an existing LWP, and return it. If we have no
657 existing entry for LWP, print an error message, but return the new,
658 uninitialized entry anyway. */
660 hash_find_known (pid_t lwp)
662 struct lwp *l = hash_find (lwp);
664 if (l->state == lwp_state_uninitialized)
665 fprintf (stderr, "ERROR: unexpected lwp: %d\n", (int) lwp);
675 /* The head of the queue of LWP's with interesting wait statuses.
676 Only the prev and next members are meaningful.
678 Every LWP in one of the lwp_state_*_interesting states should be on
679 this queue. If an LWP's state is lwp_state_dead_interesting, the
680 LWP is not in the hash table any more. */
681 static struct lwp interesting_queue
682 = { -1, 0, &interesting_queue, &interesting_queue, 42 };
686 wait_status_str (int status)
688 static char buf[100];
690 if (WIFSTOPPED (status))
691 sprintf (buf, "WIFSTOPPED (s) && WSTOPSIG (s) == %d (%s)",
692 WSTOPSIG (status), strsignal (WSTOPSIG (status)));
693 else if (WIFEXITED (status))
694 sprintf (buf, "WIFEXITED (s) && WEXITSTATUS (s) == %d",
695 WEXITSTATUS (status));
696 else if (WIFSIGNALED (status))
697 sprintf (buf, "WIFSIGNALED (s) && WTERMSIG (s) == %d (%s)%s",
699 strsignal (WTERMSIG (status)),
700 WCOREDUMP (status) ? " && WCOREDUMP(s)" : "");
702 sprintf (buf, "%d (unrecognized status)", status);
709 wait_flags_str (int flags)
711 static const struct {
715 { WNOHANG, "WNOHANG" },
716 { WUNTRACED, "WUNTRACED" },
718 { __WCLONE, "__WCLONE" },
721 { __WALL, "__WALL" },
724 { __WNOTHREAD, "__WNOTHREAD" },
728 static char buf[100];
732 for (i = 0; flag_table[i].flag; i++)
733 if (flags & flag_table[i].flag)
735 strcat (buf, flag_table[i].name);
736 flags &= ~flag_table[i].flag;
742 sprintf (buf + strlen (buf), "0x%x", (unsigned) flags);
752 lwp_state_str (enum lwp_state state)
756 case lwp_state_uninitialized:
757 return "uninitialized";
758 case lwp_state_running:
760 case lwp_state_stopped:
762 case lwp_state_stopped_interesting:
763 return "stopped_interesting";
764 case lwp_state_dead_interesting:
765 return "dead_interesting";
766 case lwp_state_running_stop_pending:
767 return "running_stop_pending";
768 case lwp_state_stopped_stop_pending:
769 return "stopped_stop_pending";
770 case lwp_state_stopped_stop_pending_interesting:
771 return "stopped_stop_pending_interesting";
774 static char buf[100];
775 sprintf (buf, "%d (unrecognized lwp_state)", state);
783 debug_report_state_change (struct gdbserv *serv,
788 if (debug_lwp_pool && old != new)
792 lwp_state_str (old), (int) lwp, lwp_state_str (new));
793 if (new == lwp_state_stopped)
794 fprintf (stderr, " (at %#lx)", debug_get_pc (serv, lwp));
795 fprintf (stderr, "\n");
800 /* Wait for a status from the LWP L (or any LWP, if L is NULL),
801 passing FLAGS to waitpid, and record the resulting wait status in
802 the LWP pool appropriately.
804 If no wait status was available (if FLAGS & WNOHANG), return zero.
805 If we successfully processed some wait status, return 1. If an
806 error occurs, set errno and return -1.
808 If waitpid returns an error, print a message to stderr. */
810 wait_and_handle (struct gdbserv *serv, struct lwp *l, int flags)
814 enum lwp_state old_state;
816 /* We can only wait for LWP's that are running. */
818 assert (l->state == lwp_state_running
819 || l->state == lwp_state_running_stop_pending);
821 /* This should be the only call to waitpid in this module, to ensure
822 that we always keep each LWP's state up to date. In fact, it
823 should be the only call to waitpid used by any module using the
824 LWP pool code at all. */
825 new_pid = waitpid (l ? l->pid : -1, &status, flags);
830 "lwp_pool: wait_and_handle: waitpid (%d, %s, %s) == %d\n",
832 (new_pid <= 0 ? "(unset)" : wait_status_str (status)),
833 wait_flags_str (flags),
839 /* If we call fprintf, that'll wipe out the value of errno. */
840 int saved_errno = errno;
842 fprintf (stderr, "ERROR: waitpid (%d) failed: %s\n",
843 l ? (int) l->pid : -1,
844 strerror (saved_errno));
851 /* No status, so no LWP has changed state. */
856 if (l->pid != new_pid)
858 fprintf (stderr, "ERROR: waited for %d, but got %d\n",
860 l = hash_find_known (new_pid);
864 l = hash_find_known (new_pid);
866 old_state = l->state;
870 if (WIFEXITED (status) || WIFSIGNALED (status))
872 /* Remove dead LWP's from the hash table, and put them in the
873 interesting queue. */
875 l->state = lwp_state_dead_interesting;
878 queue_enqueue (&interesting_queue, l);
884 assert (WIFSTOPPED (status));
886 stopsig = WSTOPSIG (status);
888 if (stopsig == SIGTRAP)
890 /* No longer stepping once a SIGTRAP is received. */
896 case lwp_state_uninitialized:
897 /* Might as well clean it up. */
898 case lwp_state_running:
899 /* It stopped, but not because of anything we did, so it's
900 interesting even if it was a SIGSTOP. */
901 l->state = lwp_state_stopped_interesting;
902 queue_enqueue (&interesting_queue, l);
905 case lwp_state_running_stop_pending:
907 /* If we were in stopping_queue, we're stopped now. */
911 /* We are expecting a boring SIGSTOP. Is this it? */
912 if (stopsig == SIGSTOP)
913 l->state = lwp_state_stopped;
916 /* Report this status, but remember that we're still
917 expecting the boring SIGSTOP. */
918 l->state = lwp_state_stopped_stop_pending_interesting;
919 queue_enqueue (&interesting_queue, l);
924 /* The assert at top should prevent any other states from
926 fprintf (stderr, "ERROR: called waitpid on LWP %d in bad state %s\n",
927 (int) l->pid, lwp_state_str (l->state));
933 debug_report_state_change (serv, l->pid, old_state, l->state);
939 /* Wait for a pending stop on the running LWP L. Return non-zero if L
940 ends up in an interesting state, or zero if L ends up in
943 Whenever we have an LWP with no interesting status, but with a stop
944 pending, we can always wait on it:
946 - Since SIGCONT can't be blocked, caught, or ignored, the wait will
947 always return immediately. The process won't run amok.
949 - Since the LWP is uninteresting to begin with, we'll end up with
950 at most one interesting wait status to report; no need to queue
951 up multiple statuses per LWP (which we'd rather not implement if
954 So, this function takes an LWP in lwp_state_running_stop_pending,
955 and puts that LWP in either lwp_state_stopped (no stop pending) or
956 some INTERESTING state. It's really just wait_and_handle, with
957 some error checking wrapped around it. */
959 check_stop_pending (struct gdbserv *serv, struct lwp *l)
961 assert (l->state == lwp_state_running_stop_pending);
963 wait_and_handle (serv, l, __WALL);
967 case lwp_state_stopped:
970 case lwp_state_stopped_stop_pending_interesting:
971 case lwp_state_dead_interesting:
974 case lwp_state_stopped_interesting:
975 /* This state shouldn't happen: since there was a pending stop,
976 a single waitpid on that LWP should have either gotten the
977 SIGSTOP, yielding 'lwp_state_stopped', or something interesting,
978 yielding 'lwp_state_stopped_stop_pending_interesting'. */
981 "ERROR: checking lwp %d for pending stop yielded "
983 (int) l->pid, lwp_state_str (l->state));
994 lwp_pool_waitpid (struct gdbserv *serv, pid_t pid, int *stat_loc, int options)
997 enum lwp_state old_state;
1000 fprintf (stderr, "lwp_pool_waitpid (%d, stat_loc, %s)\n",
1001 (int) pid, wait_flags_str (options));
1003 /* Check that we're not being passed arguments that would be
1004 meaningful for the real waitpid, but that we can't handle. */
1005 assert (pid == -1 || pid > 0);
1006 assert (! (options & ~WNOHANG));
1008 /* Do the wait, and choose an LWP to report on. */
1011 /* Handle wait statuses of any sort until something appears on
1012 the interesting queue. */
1013 while (! queue_non_empty (&interesting_queue))
1015 int result = wait_and_handle (serv, NULL, options | __WALL);
1021 l = queue_first (&interesting_queue);
1025 /* Waiting for a status from a specific pid PID. */
1026 l = hash_find_known (pid);
1028 /* We should only wait for known, running LWP's. */
1029 assert (l->state == lwp_state_running
1030 || l->state == lwp_state_running_stop_pending);
1032 /* Wait until this pid is no longer running. */
1033 while (l->state == lwp_state_running
1034 || l->state == lwp_state_running_stop_pending)
1036 int result = wait_and_handle (serv, l, options | __WALL);
1043 /* Gather info from L early, in case we free it. */
1045 old_state = l->state;
1047 *stat_loc = l->status;
1049 /* The INTERESTING states specifically mean that the LWP has a
1050 status which should be reported to the user, but that hasn't been
1051 yet. Now we're about to report that status, so we need to mark
1052 interesting LWP's as uninteresting. */
1055 case lwp_state_uninitialized:
1056 case lwp_state_running:
1057 case lwp_state_stopped:
1058 case lwp_state_stopped_stop_pending:
1059 case lwp_state_running_stop_pending:
1060 /* These are uninteresting states. The waiting code above
1061 should never have chosen an LWP in one of these states. */
1063 "ERROR: %s: selected uninteresting LWP %d state %s\n",
1064 __func__, l->pid, lwp_state_str (l->state));
1068 case lwp_state_stopped_interesting:
1069 /* Now that we've reported this wait status to the user, the LWP
1070 is not interesting any more. */
1071 l->state = lwp_state_stopped;
1073 debug_report_state_change (serv, l->pid, old_state, l->state);
1076 case lwp_state_dead_interesting:
1077 /* Once we've reported this status, we have washed our hands of
1078 this LWP entirely. */
1083 "lwp_pool: %s: LWP %d state dead_interesting -> freed\n",
1087 case lwp_state_stopped_stop_pending_interesting:
1088 /* We're about to report this LWP's status, making it
1089 uninteresting, but it's still got a stop pending. */
1091 l->state = lwp_state_stopped_stop_pending;
1092 debug_report_state_change (serv, l->pid, old_state, l->state);
1096 fprintf (stderr, "ERROR: lwp %d in bad state: %s\n",
1097 (int) l->pid, lwp_state_str (l->state));
1107 /* Stopping and continuing. */
1111 lwp_pool_stop_all (struct gdbserv *serv)
1116 fprintf (stderr, "lwp_pool_stop_all ()\n");
1118 /* The head of the queue of running LWP's that we are stopping.
1119 Only the prev and next members are meaningful. */
1120 struct lwp stopping_queue;
1122 stopping_queue.next = stopping_queue.prev = &stopping_queue;
1124 /* First, put every LWP that's not already STOPPED or DEAD in a STOP
1125 PENDING state, and put them all on stopping_queue. */
1126 for (i = 0; i < hash_size; i++)
1128 struct lwp *l = hash[i];
1132 enum lwp_state old_state = l->state;
1136 /* There should never be 'uninitialized' entries left in
1137 the table. Whoever created them ought to have put them
1138 in some meaningful state before returning. */
1139 case lwp_state_uninitialized:
1140 assert (l->state != lwp_state_uninitialized);
1143 case lwp_state_running:
1144 /* A 'no such process' error here indicates an NPTL thread
1146 kill_lwp (l->pid, SIGSTOP);
1147 l->state = lwp_state_running_stop_pending;
1148 queue_enqueue (&stopping_queue, l);
1151 case lwp_state_stopped:
1152 case lwp_state_stopped_stop_pending:
1153 case lwp_state_stopped_interesting:
1154 case lwp_state_dead_interesting:
1155 case lwp_state_stopped_stop_pending_interesting:
1156 /* Nothing needs to be done here. */
1159 case lwp_state_running_stop_pending:
1160 /* LWPs should never be in this state between calls to
1161 public lwp_pool functions. */
1162 assert (l->state != lwp_state_running_stop_pending);
1166 fprintf (stderr, "ERROR: lwp %d in bad state: %s\n",
1167 (int) l->pid, lwp_state_str (l->state));
1172 debug_report_state_change (serv, l->pid, old_state, l->state);
1176 /* Gather wait results until the stopping queue is empty. */
1177 while (queue_non_empty (&stopping_queue))
1178 if (wait_and_handle (serv, NULL, __WALL) < 0)
1180 fprintf (stderr, "ERROR: lwp_pool_stop_all wait failed: %s",
1185 /* Now all LWPs should be stopped or dead. But let's check. */
1186 for (i = 0; i < hash_size; i++)
1188 struct lwp *l = hash[i];
1192 case lwp_state_uninitialized:
1193 assert (l->state != lwp_state_uninitialized);
1196 case lwp_state_running:
1197 case lwp_state_running_stop_pending:
1199 "ERROR: lwp_pool_stop_all failed: LWP %d still running\n",
1203 case lwp_state_stopped:
1204 case lwp_state_stopped_stop_pending:
1205 case lwp_state_stopped_interesting:
1206 case lwp_state_dead_interesting:
1207 case lwp_state_stopped_stop_pending_interesting:
1208 /* That's all as it should be. */
1212 fprintf (stderr, "ERROR: lwp %d in bad state: %s\n",
1213 (int) l->pid, lwp_state_str (l->state));
1221 continue_or_step_lwp (struct gdbserv *serv, struct lwp *l, int sig)
1225 status = singlestep_lwp (serv, l->pid, sig);
1227 status = continue_lwp (l->pid, sig);
1234 lwp_pool_continue_all (struct gdbserv *serv)
1239 fprintf (stderr, "lwp_pool_continue_all ()\n");
1241 /* This loop makes every LWP either INTERESTING, or RUNNING. */
1242 for (i = 0; i < hash_size; i++)
1244 struct lwp *l = hash[i];
1248 enum lwp_state old_state = l->state;
1252 /* There should never be 'uninitialized' entries left in
1253 the table. Whoever created them ought to have put them
1254 in some meaningful state before returning. */
1255 case lwp_state_uninitialized:
1256 assert (l->state != lwp_state_uninitialized);
1259 case lwp_state_running:
1260 /* It's already running, so nothing needs to be done. */
1263 case lwp_state_stopped:
1264 if (continue_or_step_lwp (serv, l, 0) == 0)
1265 l->state = lwp_state_running;
1268 case lwp_state_stopped_interesting:
1269 case lwp_state_dead_interesting:
1270 case lwp_state_stopped_stop_pending_interesting:
1271 /* We still have an unreported wait status here, so leave it
1272 alone; we'll report it. */
1275 case lwp_state_running_stop_pending:
1276 /* There shouldn't be any LWPs in this state at this
1277 point. We should be calling check_stop_pending or
1278 wait_and_handle as soon as we create them. */
1279 assert (l->state != lwp_state_running_stop_pending);
1282 case lwp_state_stopped_stop_pending:
1283 /* Continue it, and then wait for the pending stop.
1284 Since SIGSTOP cannot be blocked, caught, or ignored,
1285 the wait will always return immediately; the LWP
1287 if (continue_lwp (l->pid, 0) == 0)
1289 l->state = lwp_state_running_stop_pending;
1290 if (check_stop_pending (serv, l) == 0)
1292 if (continue_or_step_lwp (serv, l, 0) == 0)
1293 l->state = lwp_state_running;
1299 fprintf (stderr, "ERROR: lwp %d in bad state: %s\n",
1300 (int) l->pid, lwp_state_str (l->state));
1305 debug_report_state_change (serv, l->pid, old_state, l->state);
1312 lwp_pool_continue_lwp (struct gdbserv *serv, pid_t pid, int signal)
1314 struct lwp *l = hash_find_known (pid);
1315 enum lwp_state old_state = l->state;
1319 fprintf (stderr, "lwp_pool_continue_lwp (%d, %d)\n",
1324 case lwp_state_uninitialized:
1325 assert (l->state != lwp_state_uninitialized);
1328 /* We should only be continuing LWPs that have reported a
1329 WIFSTOPPED status via lwp_pool_waitpid and have not been
1330 continued or singlestepped since. */
1331 case lwp_state_running:
1332 case lwp_state_stopped_interesting:
1333 case lwp_state_dead_interesting:
1334 case lwp_state_running_stop_pending:
1335 case lwp_state_stopped_stop_pending_interesting:
1336 fprintf (stderr, "ERROR: continuing LWP %d in unwaited state: %s\n",
1337 (int) l->pid, lwp_state_str (l->state));
1340 case lwp_state_stopped:
1341 result = continue_or_step_lwp (serv, l, signal);
1343 l->state = lwp_state_running;
1346 case lwp_state_stopped_stop_pending:
1347 /* Continue it, delivering the given signal, and then wait for
1348 the pending stop. Since SIGSTOP cannot be blocked, caught,
1349 or ignored, the wait will always return immediately; the LWP
1352 We must deliver the signal with the first continue_lwp call;
1353 if check_stop_pending says the LWP has a new interesting
1354 status, then we'll never reach the second continue_lwp, and
1355 we'll lose our chance to deliver the signal. */
1356 if (continue_lwp (l->pid, signal) == 0)
1358 l->state = lwp_state_running_stop_pending;
1359 if (check_stop_pending (serv, l) == 0)
1361 if (continue_or_step_lwp (serv, l, 0) == 0)
1362 l->state = lwp_state_running;
1368 fprintf (stderr, "ERROR: lwp %d in bad state: %s\n",
1369 (int) l->pid, lwp_state_str (l->state));
1374 debug_report_state_change (serv, l->pid, old_state, l->state);
1381 lwp_pool_singlestep_lwp (struct gdbserv *serv, pid_t lwp, int signal)
1383 struct lwp *l = hash_find_known (lwp);
1384 enum lwp_state old_state = l->state;
1388 fprintf (stderr, "lwp_pool_singlestep_lwp (%p, %d, %d)\n",
1389 serv, (int) lwp, signal);
1393 case lwp_state_uninitialized:
1394 assert (l->state != lwp_state_uninitialized);
1397 /* We should only be stepping LWPs that have reported a
1398 WIFSTOPPED status via lwp_pool_waitpid and have not been
1399 continued or singlestepped since. */
1400 case lwp_state_running:
1401 case lwp_state_stopped_interesting:
1402 case lwp_state_dead_interesting:
1403 case lwp_state_running_stop_pending:
1404 case lwp_state_stopped_stop_pending_interesting:
1405 fprintf (stderr, "ERROR: stepping LWP %d in unwaited state: %s\n",
1406 (int) l->pid, lwp_state_str (l->state));
1409 case lwp_state_stopped:
1410 result = singlestep_lwp (serv, l->pid, signal);
1413 l->state = lwp_state_running;
1418 case lwp_state_stopped_stop_pending:
1419 /* Continue it, delivering the given signal, and then wait for
1420 the pending stop. Since SIGSTOP cannot be blocked, caught,
1421 or ignored, the wait will always return immediately; the LWP
1424 We must deliver the signal with the continue_lwp call; if
1425 check_stop_pending says the LWP has a new interesting status,
1426 then we'll never reach the singlestep_lwp, and we'll lose our
1427 chance to deliver the signal at all. */
1428 if (continue_lwp (l->pid, signal) == 0)
1430 l->state = lwp_state_running_stop_pending;
1431 if (check_stop_pending (serv, l) == 0)
1433 if (singlestep_lwp (serv, l->pid, 0) == 0)
1435 l->state = lwp_state_running;
1443 fprintf (stderr, "ERROR: lwp %d in bad state: %s\n",
1444 (int) l->pid, lwp_state_str (l->state));
1449 debug_report_state_change (serv, l->pid, old_state, l->state);
1456 /* Adding new LWP's to the pool. */
1459 lwp_pool_new_stopped (pid_t pid)
1461 struct lwp *l = hash_find_new (pid);
1464 fprintf (stderr, "lwp_pool_new_stopped (%d)\n", (int) pid);
1466 l->state = lwp_state_stopped;
1469 fprintf (stderr, "lwp_pool: %s: new LWP %d state %s\n",
1470 __func__, l->pid, lwp_state_str (l->state));
1475 lwp_pool_attach (struct gdbserv *serv, pid_t pid)
1477 /* Are we already managing this LWP? */
1478 struct lwp *l = hash_find (pid);
1481 fprintf (stderr, "lwp_pool_attach (%d)\n", (int) pid);
1483 if (l->state == lwp_state_uninitialized)
1485 /* No, we really need to attach to it. */
1486 int status = attach_lwp (pid);
1490 /* Forget about the lwp. */
1496 /* Since we attached to it, we'll get a SIGSTOP for this
1497 eventually. Wait for it now, to put it in either
1498 lwp_state_stopped, or in some interesting state. */
1499 l->state = lwp_state_running_stop_pending;
1502 fprintf (stderr, "lwp_pool: %s: new LWP %d state %s\n",
1503 __func__, l->pid, lwp_state_str (l->state));
1505 check_stop_pending (serv, l);