2 * Copyright (c) 2000, 2001, 2002, 2009, 2011 Red Hat, Inc.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * A copy of the GNU General Public License can be found at
12 * Written by DJ Delorie <dj@redhat.com>
25 #include <cygwin/version.h>
27 static char *prog_name;
29 static struct option longopts[] =
31 {"console-trace", no_argument, NULL, 'c' },
32 {"disable", no_argument, NULL, 'd' },
33 {"enable", no_argument, NULL, 'e' },
34 {"help", no_argument, NULL, 'h' },
35 {"dll", no_argument, NULL, 'l' },
36 {"sub-threads", no_argument, NULL, 's' },
37 {"trace-eip", no_argument, NULL, 't' },
38 {"verbose", no_argument, NULL, 'v' },
39 {"version", no_argument, NULL, 'V' },
43 static char opts[] = "+cdehlstvV";
45 #define KERNEL_ADDR 0x77000000
52 /* from winsup/gmon.h */
54 unsigned long lpc; /* base pc address of sample buffer */
55 unsigned long hpc; /* max pc address of sampled buffer */
56 int ncnt; /* size of sample buffer (plus this header) */
57 int version; /* version number */
58 int profrate; /* profiling clock rate */
59 int spare[3]; /* reserved */
61 #define GMONVERSION 0x00051879
62 #define HISTCOUNTER unsigned short
65 unsigned int base_address;
73 unsigned char real_byte;
76 unsigned low_pc=0, high_pc=0;
77 unsigned last_pc=0, pc, last_sp=0, sp;
78 int total_cycles, count;
80 PROCESS_INFORMATION procinfo;
85 int running = 1, profiling = 1;
86 char dll_name[1024], *dll_ptr, *cp;
88 unsigned opcode_count = 0;
90 int stepping_enabled = 1;
91 int tracing_enabled = 0;
92 int trace_console = 0;
93 int trace_all_threads = 0;
97 #define MAXTHREADS 100
98 DWORD active_thread_ids[MAXTHREADS];
99 HANDLE active_threads[MAXTHREADS];
100 DWORD thread_step_flags[MAXTHREADS];
101 DWORD thread_return_address[MAXTHREADS];
102 int num_active_threads = 0;
103 int suspended_count=0;
106 DllInfo dll_info[MAXDLLS];
110 PendingBreakpoints pending_breakpoints[MAXPENDS];
111 int num_breakpoints=0;
114 add_breakpoint (unsigned int address)
118 static char int3[] = { 0xcc };
119 for (i=0; i<num_breakpoints; i++)
121 if (pending_breakpoints[i].address == address)
123 if (pending_breakpoints[i].address == 0)
128 pending_breakpoints[i].address = address;
129 ReadProcessMemory (hProcess,
131 &(pending_breakpoints[i].real_byte),
134 WriteProcessMemory (hProcess,
136 (LPVOID)int3, 1, &rv);
137 if (i >= num_breakpoints)
138 num_breakpoints = i+1;
142 remove_breakpoint (unsigned int address)
146 for (i=0; i<num_breakpoints; i++)
148 if (pending_breakpoints[i].address == address)
150 pending_breakpoints[i].address = 0;
151 WriteProcessMemory (hProcess,
153 &(pending_breakpoints[i].real_byte),
162 lookup_thread_id (DWORD threadId, int *tix)
166 for (i=0; i<num_active_threads; i++)
167 if (active_thread_ids[i] == threadId)
171 return active_threads[i];
177 set_step_threads (int threadId, int trace)
180 HANDLE thread = lookup_thread_id (threadId, &tix);
182 rv = GetThreadContext (thread, &context);
185 thread_step_flags[tix] = trace;
187 context.EFlags |= 0x100; /* TRAP (single step) flag */
189 context.EFlags &= ~0x100; /* TRAP (single step) flag */
190 SetThreadContext (thread, &context);
198 for (i=0; i<num_active_threads; i++)
200 GetThreadContext (active_threads[i], &context);
201 s = context.EFlags & 0x0100;
202 if (!s && thread_step_flags[i])
204 set_step_threads (active_thread_ids[i], 1);
210 dll_sort (const void *va, const void *vb)
212 DllInfo *a = (DllInfo *)va;
213 DllInfo *b = (DllInfo *)vb;
214 if (a->base_address < b->base_address)
220 addr2dllname (unsigned int addr)
223 for (i=num_dlls-1; i>=0; i--)
225 if (dll_info[i].base_address < addr)
227 return dll_info[i].name;
234 dump_registers (HANDLE thread)
236 context.ContextFlags = CONTEXT_FULL;
237 GetThreadContext (thread, &context);
238 printf ("eax %08lx ebx %08lx ecx %08lx edx %08lx eip\n",
239 context.Eax, context.Ebx, context.Ecx, context.Edx);
240 printf ("esi %08lx edi %08lx ebp %08lx esp %08lx %08lx\n",
241 context.Esi, context.Esi, context.Ebp, context.Esp, context.Eip);
244 typedef struct Edge {
246 unsigned int from_pc;
254 store_call_edge (unsigned int from_pc, unsigned int to_pc)
257 unsigned int h = ((from_pc + to_pc)>>4) & 4095;
258 for (e=edges[h]; e; e=e->next)
259 if (e->from_pc == from_pc && e->to_pc == to_pc)
263 e = (Edge *)malloc (sizeof (Edge));
266 e->from_pc = from_pc;
274 write_call_edges (FILE *f)
278 for (h=0; h<4096; h++)
279 for (e=edges[h]; e; e=e->next)
280 fwrite (&(e->from_pc), 1, 3*sizeof (unsigned int), f);
284 wide_strdup (char *cp)
286 unsigned short *s = (unsigned short *)cp;
289 for (len=0; s[len]; len++);
290 rv = (char *)malloc (len+1);
291 for (len=0; s[len]; len++)
298 run_program (char *cmdline)
305 memset (&startup, 0, sizeof (startup));
306 startup.cb = sizeof (startup);
308 if (!CreateProcess (0, cmdline, 0, 0, 0,
309 CREATE_NEW_PROCESS_GROUP
312 | DEBUG_ONLY_THIS_PROCESS,
313 0, 0, &startup, &procinfo))
315 fprintf (stderr, "Can't create process: error %ld\n", GetLastError ());
319 hProcess = procinfo.hProcess;
321 printf ("procinfo: %08x %08x %08x %08x\n",
322 hProcess, procinfo.hThread, procinfo.dwProcessId, procinfo.dwThreadId);
325 active_threads[0] = procinfo.hThread;
326 active_thread_ids[0] = procinfo.dwThreadId;
327 thread_step_flags[0] = stepping_enabled;
328 num_active_threads = 1;
330 dll_info[0].base_address = 0;
331 dll_info[0].pcount = 0;
332 dll_info[0].scount = 0;
333 dll_info[0].name = cmdline;
336 SetThreadPriority (procinfo.hThread, THREAD_PRIORITY_IDLE);
338 context.ContextFlags = CONTEXT_FULL;
340 ResumeThread (procinfo.hThread);
346 tracefile = fopen ("trace.ssp", "w");
350 perror ("trace.ssp");
360 int contv = DBG_CONTINUE;
362 event.dwDebugEventCode = -1;
363 if (!WaitForDebugEvent (&event, INFINITE))
365 printf ("idle...\n");
368 hThread = lookup_thread_id (event.dwThreadId, &tix);
371 printf ("DE: %x/%d %d %d ",
373 event.dwDebugEventCode, num_active_threads);
374 for (src=0; src<num_active_threads; src++)
376 int sc = SuspendThread (active_threads[src]);
377 int rv = GetThreadContext (active_threads[src], &context);
378 ResumeThread (active_threads[src]);
379 printf (" [%x,%x,%x]",
380 active_threads[src], context.Eip, active_thread_ids[src]);
385 switch (event.dwDebugEventCode)
388 case CREATE_PROCESS_DEBUG_EVENT:
391 case CREATE_THREAD_DEBUG_EVENT:
393 printf ("create thread %08lx at %08x %s\n",
395 (int)event.u.CreateThread.lpStartAddress,
396 addr2dllname ((unsigned int)event.u.CreateThread.lpStartAddress));
398 active_thread_ids[num_active_threads] = event.dwThreadId;
399 active_threads[num_active_threads] = event.u.CreateThread.hThread;
400 thread_return_address[num_active_threads] = 0;
401 num_active_threads++;
403 if (trace_all_threads && stepping_enabled)
405 thread_step_flags[num_active_threads-1] = stepping_enabled;
406 add_breakpoint ((int)event.u.CreateThread.lpStartAddress);
411 case EXIT_THREAD_DEBUG_EVENT:
413 printf ("exit thread %08lx, code=%ld\n",
415 event.u.ExitThread.dwExitCode);
417 for (src=0, dest=0; src<num_active_threads; src++)
418 if (active_thread_ids[src] != event.dwThreadId)
420 active_thread_ids[dest] = active_thread_ids[src];
421 active_threads[dest] = active_threads[src];
424 num_active_threads = dest;
427 case EXCEPTION_DEBUG_EVENT:
428 rv = GetThreadContext (hThread, &context);
429 switch (event.u.Exception.ExceptionRecord.ExceptionCode)
431 case STATUS_BREAKPOINT:
432 if (remove_breakpoint ((int)event.u.Exception.ExceptionRecord.ExceptionAddress))
436 SetThreadContext (hThread, &context);
437 if (ReadProcessMemory (hProcess, (void *)context.Esp, &rv, 4, &rv))
438 thread_return_address[tix] = rv;
440 set_step_threads (event.dwThreadId, stepping_enabled);
441 case STATUS_SINGLE_STEP:
443 pc = (unsigned int)event.u.Exception.ExceptionRecord.ExceptionAddress;
444 sp = (unsigned int)context.Esp;
446 fprintf (tracefile, "%08x %08lx\n", pc, event.dwThreadId);
449 printf ("%d %08x\n", tix, pc);
456 for (i=num_dlls-1; i>=0; i--)
458 if (dll_info[i].base_address < context.Eip)
460 if (hThread == procinfo.hThread)
461 dll_info[i].pcount++;
463 dll_info[i].scount++;
469 if (pc < last_pc || pc > last_pc+10)
474 fprintf (stderr, " %08x %d %d \r",
475 pc, ncalls, opcode_count);
480 store_call_edge (last_pc, pc);
481 if (last_pc < KERNEL_ADDR && pc > KERNEL_ADDR)
485 ReadProcessMemory (hProcess,
490 printf ("call last_pc = %08x pc = %08x rv = %08x\n",
491 last_pc, pc, retaddr);
492 /* experimental - try to skip kernel calls for speed */
493 add_breakpoint (retaddr);
494 set_step_threads (event.dwThreadId, 0);
503 if (pc >= low_pc && pc < high_pc)
504 hits[(pc - low_pc)/2] ++;
509 printf ("exception %ld, ", event.u.Exception.dwFirstChance);
510 printf ("code: %lx flags: %lx\n",
511 event.u.Exception.ExceptionRecord.ExceptionCode,
512 event.u.Exception.ExceptionRecord.ExceptionFlags);
513 if (event.u.Exception.dwFirstChance == 1)
514 dump_registers (hThread);
516 contv = DBG_EXCEPTION_NOT_HANDLED;
523 if (pc == thread_return_address[tix])
525 if (context.EFlags & 0x100)
527 context.EFlags &= ~0x100; /* TRAP (single step) flag */
528 SetThreadContext (hThread, &context);
531 else if (stepping_enabled)
533 if (!(context.EFlags & 0x100))
535 context.EFlags |= 0x100; /* TRAP (single step) flag */
536 SetThreadContext (hThread, &context);
542 case OUTPUT_DEBUG_STRING_EVENT:
543 string = (char *)malloc (event.u.DebugString.nDebugStringLength+1);
544 i = ReadProcessMemory (hProcess,
545 event.u.DebugString.lpDebugStringData,
547 event.u.DebugString.nDebugStringLength,
551 printf ("error reading memory: %ld %ld\n", rv, GetLastError ());
554 printf ("ODS: %x/%d \"%s\"\n",
555 (int)hThread, tix, string);
557 if (strcmp (string, "ssp on") == 0)
559 stepping_enabled = 1;
560 set_step_threads (event.dwThreadId, 1);
563 if (strcmp (string, "ssp off") == 0)
565 stepping_enabled = 0;
566 set_step_threads (event.dwThreadId, 0);
572 case LOAD_DLL_DEBUG_EVENT:
574 printf ("load dll %08x:",
575 (int)event.u.LoadDll.lpBaseOfDll);
577 dll_ptr = (char *)"( u n k n o w n ) \0\0";
578 if (event.u.LoadDll.lpImageName)
580 ReadProcessMemory (hProcess,
581 event.u.LoadDll.lpImageName,
587 ReadProcessMemory (hProcess,
594 for (cp=dll_name; *cp; cp++)
596 if (*cp == '\\' || *cp == '/')
600 *cp = tolower ((unsigned char) *cp);
606 dll_info[num_dlls].base_address
607 = (unsigned int)event.u.LoadDll.lpBaseOfDll;
608 dll_info[num_dlls].pcount = 0;
609 dll_info[num_dlls].scount = 0;
610 dll_info[num_dlls].name = wide_strdup (dll_ptr);
612 printf (" %s\n", dll_info[num_dlls].name);
614 qsort (dll_info, num_dlls, sizeof (DllInfo), dll_sort);
618 case UNLOAD_DLL_DEBUG_EVENT:
620 printf ("unload dll\n");
623 case EXIT_PROCESS_DEBUG_EVENT:
625 printf ("process %08lx %08lx exit %ld\n",
626 event.dwProcessId, event.dwThreadId,
627 event.u.ExitProcess.dwExitCode);
634 ContinueDebugEvent (event.dwProcessId, event.dwThreadId, contv);
638 for (pc=low_pc; pc<high_pc; pc+=2)
640 count += hits[(pc - low_pc)/2];
642 printf ("total cycles: %d, counted cycles: %d\n", total_cycles, count);
650 usage (FILE * stream)
653 "Usage: %s [options] low_pc high_pc command...\n"
655 "Single-step profile COMMAND\n"
657 " -c, --console-trace trace every EIP value to the console. *Lots* slower.\n"
658 " -d, --disable disable single-stepping by default; use\n"
659 " OutputDebugString (\"ssp on\") to enable stepping\n"
660 " -e, --enable enable single-stepping by default; use\n"
661 " OutputDebugString (\"ssp off\") to disable stepping\n"
662 " -h, --help output usage information and exit\n"
663 " -l, --dll enable dll profiling. A chart of relative DLL usage\n"
664 " is produced after the run.\n"
665 " -s, --sub-threads trace sub-threads too. Dangerous if you have\n"
666 " race conditions.\n"
667 " -t, --trace-eip trace every EIP value to a file TRACE.SSP. This\n"
668 " gets big *fast*.\n"
669 " -v, --verbose output verbose messages about debug events.\n"
670 " -V, --version output version information and exit\n"
672 "Example: %s 0x401000 0x403000 hello.exe\n"
674 "", prog_name, prog_name);
675 if (stream == stdout)
677 "SSP - The Single Step Profiler\n"
679 "Original Author: DJ Delorie <dj@redhat.com>\n"
681 "The SSP is a program that uses the Win32 debug API to run a program\n"
682 "one ASM instruction at a time. It records the location of each\n"
683 "instruction used, how many times that instruction is used, and all\n"
684 "function calls. The results are saved in a format that is usable by\n"
685 "the profiling program \"gprof\", although gprof will claim the values\n"
686 "are seconds, they really are instruction counts. More on that later.\n"
688 "Because the SSP was originally designed to profile the cygwin DLL, it\n"
689 "does not automatically select a block of code to report statistics on.\n"
690 "You must specify the range of memory addresses to keep track of\n"
691 "manually, but it's not hard to figure out what to specify. Use the\n"
692 "\"objdump\" program to determine the bounds of the target's \".text\"\n"
693 "section. Let's say we're profiling cygwin1.dll. Make sure you've\n"
694 "built it with debug symbols (else gprof won't run) and run objdump\n"
697 " objdump -h cygwin1.dll\n"
699 "It will print a report like this:\n"
701 "cygwin1.dll: file format pei-i386\n"
704 "Idx Name Size VMA LMA File off Algn\n"
705 " 0 .text 0007ea00 61001000 61001000 00000400 2**2\n"
706 " CONTENTS, ALLOC, LOAD, READONLY, CODE, DATA\n"
707 " 1 .data 00008000 61080000 61080000 0007ee00 2**2\n"
708 " CONTENTS, ALLOC, LOAD, DATA\n"
711 "The only information we're concerned with are the VMA of the .text\n"
712 "section and the VMA of the section after it (sections are usually\n"
713 "contiguous; you can also add the Size to the VMA to get the end\n"
714 "address). In this case, the VMA is 0x61001000 and the ending address\n"
715 "is either 0x61080000 (start of .data method) or 0x0x6107fa00 (VMA+Size\n"
718 "There are two basic ways to use SSP - either profiling a whole\n"
719 "program, or selectively profiling parts of the program.\n"
721 "To profile a whole program, just run ssp without options. By default,\n"
722 "it will step the whole program. Here's a simple example, using the\n"
725 " ssp 0x61001000 0x61080000 hello.exe\n"
727 "This will step the whole program. It will take at least 8 minutes on\n"
728 "a PII/300 (yes, really). When it's done, it will create a file called\n"
729 "\"gmon.out\". You can turn this data file into a readable report with\n"
732 " gprof -b cygwin1.dll\n"
734 "The \"-b\" means 'skip the help pages'. You can omit this until you're\n"
735 "familiar with the report layout. The gprof documentation explains\n"
736 "a lot about this report, but ssp changes a few things. For example,\n"
737 "the first part of the report reports the amount of time spent in each\n"
738 "function, like this:\n"
740 "Each sample counts as 0.01 seconds.\n"
741 " %% cumulative self self total\n"
742 " time seconds seconds calls ms/call ms/call name\n"
743 " 10.02 231.22 72.43 46 1574.57 1574.57 strcspn\n"
744 " 7.95 288.70 57.48 130 442.15 442.15 strncasematch\n"
746 "The \"seconds\" columns are really CPU opcodes, 1/100 second per opcode.\n"
747 "So, \"231.22\" above means 23,122 opcodes. The ms/call values are 10x\n"
748 "too big; 1574.57 means 157.457 opcodes per call. Similar adjustments\n"
749 "need to be made for the \"self\" and \"children\" columns in the second\n"
750 "part of the report.\n"
752 "OK, so now we've got a huge report that took a long time to generate,\n"
753 "and we've identified a spot we want to work on optimizing. Let's say\n"
754 "it's the time() function. We can use SSP to selectively profile this\n"
755 "function by using OutputDebugString() to control SSP from within the\n"
756 "program. Here's a sample program:\n"
758 " #include <windows.h>\n"
762 " OutputDebugString(\"ssp on\");\n"
764 " OutputDebugString(\"ssp off\");\n"
767 "Then, add the \"-d\" option to ssp to default to *disabling* profiling.\n"
768 "The program will run at full speed until the first OutputDebugString,\n"
769 "then step until the second.\n"
771 " ssp -d 0x61001000 0x61080000 hello.exe\n"
773 "You can then use gprof (as usual) to see the performance profile for\n"
774 "just that portion of the program's execution.\n"
776 "There are many options to ssp. Since step-profiling makes your\n"
777 "program run about 1,000 times slower than normal, it's best to\n"
778 "understand all the options so that you can narrow down the parts\n"
779 "of your program you need to single-step.\n"
781 "\"-v\" - verbose. This prints messages about threads starting and\n"
782 "stopping, OutputDebugString calls, DLLs loading, etc.\n"
784 "\"-t\" and \"-c\" - tracing. With -t, *every* step's address is written\n"
785 "to the file \"trace.ssp\". This can be used to help debug functions,\n"
786 "since it can trace multiple threads. Clever use of scripts can match\n"
787 "addresses with disassembled opcodes if needed. Warning: creates\n"
788 "*huge* files, very quickly. \"-c\" prints each address to the console,\n"
789 "useful for debugging key chunks of assembler.\n"
790 "Use \"addr2line -C -f -s -e foo.exe < trace.ssp > lines.ssp\" and then\n"
791 "\"perl cvttrace\" to convert to symbolic traces.\n"
793 "\"-s\" - subthreads. Usually, you only need to trace the main thread,\n"
794 "but sometimes you need to trace all threads, so this enables that.\n"
795 "It's also needed when you want to profile a function that only a\n"
796 "subthread calls. However, using OutputDebugString automatically\n"
797 "enables profiling on the thread that called it, not the main thread.\n"
799 "\"-l\" - dll profiling. Generates a pretty table of how much time was\n"
800 "spent in each dll the program used. No sense optimizing a function in\n"
801 "your program if most of the time is spent in the DLL.\n"
803 "Typically use the -v, -s, and -l options:\n"
805 " ssp -v -s -l -d 0x61001000 0x61080000 hello.exe\n"
807 if (stream == stderr)
808 fprintf (stream, "Try '%s --help' for more information.\n", prog_name);
809 exit (stream == stderr ? 1 : 0);
815 printf ("ssp (cygwin) %d.%d.%d\n"
816 "Single-Step Profiler\n"
817 "Copyright (C) 2000 - %s Red Hat, Inc.\n"
818 "This is free software; see the source for copying conditions. There is NO\n"
819 "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n",
820 CYGWIN_VERSION_DLL_MAJOR / 1000,
821 CYGWIN_VERSION_DLL_MAJOR % 1000,
822 CYGWIN_VERSION_DLL_MINOR,
823 strrchr (__DATE__, ' ') + 1);
827 main (int argc, char **argv)
830 int total_pcount = 0, total_scount = 0;
835 prog_name = program_invocation_short_name;
837 while ((c = getopt_long (argc, argv, opts, longopts, NULL)) != EOF)
841 printf ("tracing *all* $eip to the console\n");
845 printf ("stepping disabled; enable via OutputDebugString (\"ssp on\")\n");
846 stepping_enabled = 0;
849 printf ("stepping enabled; disable via OutputDebugString (\"ssp off\")\n");
850 stepping_enabled = 1;
856 printf ("profiling dll usage\n");
860 printf ("tracing all sub-threads too, not just the main one\n");
861 trace_all_threads = 1;
864 printf ("tracing all $eip to trace.ssp\n");
868 printf ("verbose messages enabled\n");
875 fprintf (stderr, "Try `%s --help' for more information.\n", prog_name);
879 if ( (argc - optind) < 3 )
881 sscanf (argv[optind++], "%i", &low_pc);
882 sscanf (argv[optind++], "%i", &high_pc);
884 if (low_pc > high_pc-8)
886 fprintf (stderr, "Hey, low_pc must be lower than high_pc\n");
890 hits = (HISTCOUNTER *)malloc (high_pc-low_pc+4);
891 memset (hits, 0, high_pc-low_pc+4);
893 fprintf (stderr, "prun: [%08x,%08x] Running '%s'\n",
894 low_pc, high_pc, argv[optind]);
896 run_program (argv[optind]);
900 hdr.ncnt = high_pc-low_pc + sizeof (hdr);
901 hdr.version = GMONVERSION;
904 gmon = fopen ("gmon.out", "wb");
905 fwrite (&hdr, 1, sizeof (hdr), gmon);
906 fwrite (hits, 1, high_pc-low_pc, gmon);
907 write_call_edges (gmon);
912 /* 1234567 123% 1234567 123% 12345678 xxxxxxxxxxx */
913 printf (" Main-Thread Other-Thread BaseAddr DLL Name\n");
917 for (i=0; i<num_dlls; i++)
919 total_pcount += dll_info[i].pcount;
920 total_scount += dll_info[i].scount;
923 if (total_pcount == 0) total_pcount++;
924 if (total_scount == 0) total_scount++;
926 for (i=0; i<num_dlls; i++)
927 if (dll_info[i].pcount || dll_info[i].scount)
929 printf ("%7d %3d%% %7d %3d%% %08x %s\n",
931 (dll_info[i].pcount*100)/opcode_count,
933 (dll_info[i].scount*100)/opcode_count,
934 dll_info[i].base_address,