2 * Copyright (c) 2000, 2001, 2002, 2009 Red Hat, Inc.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * A copy of the GNU General Public License can be found at
12 * Written by DJ Delorie <dj@redhat.com>
25 static const char version[] = "$Revision$";
26 static char *prog_name;
28 static struct option longopts[] =
30 {"console-trace", no_argument, NULL, 'c' },
31 {"disable", no_argument, NULL, 'd' },
32 {"enable", no_argument, NULL, 'e' },
33 {"help", no_argument, NULL, 'h' },
34 {"dll", no_argument, NULL, 'l' },
35 {"sub-threads", no_argument, NULL, 's' },
36 {"trace-eip", no_argument, NULL, 't' },
37 {"verbose", no_argument, NULL, 'v' },
38 {"version", no_argument, NULL, 'V' },
42 static char opts[] = "+cdehlstvV";
44 #define KERNEL_ADDR 0x77000000
51 /* from winsup/gmon.h */
53 unsigned long lpc; /* base pc address of sample buffer */
54 unsigned long hpc; /* max pc address of sampled buffer */
55 int ncnt; /* size of sample buffer (plus this header) */
56 int version; /* version number */
57 int profrate; /* profiling clock rate */
58 int spare[3]; /* reserved */
60 #define GMONVERSION 0x00051879
61 #define HISTCOUNTER unsigned short
64 unsigned int base_address;
72 unsigned char real_byte;
75 unsigned low_pc=0, high_pc=0;
76 unsigned last_pc=0, pc, last_sp=0, sp;
77 int total_cycles, count;
79 PROCESS_INFORMATION procinfo;
84 int running = 1, profiling = 1;
85 char dll_name[1024], *dll_ptr, *cp;
87 unsigned opcode_count = 0;
89 int stepping_enabled = 1;
90 int tracing_enabled = 0;
91 int trace_console = 0;
92 int trace_all_threads = 0;
96 #define MAXTHREADS 100
97 DWORD active_thread_ids[MAXTHREADS];
98 HANDLE active_threads[MAXTHREADS];
99 DWORD thread_step_flags[MAXTHREADS];
100 DWORD thread_return_address[MAXTHREADS];
101 int num_active_threads = 0;
102 int suspended_count=0;
105 DllInfo dll_info[MAXDLLS];
109 PendingBreakpoints pending_breakpoints[MAXPENDS];
110 int num_breakpoints=0;
113 add_breakpoint (unsigned int address)
117 static char int3[] = { 0xcc };
118 for (i=0; i<num_breakpoints; i++)
120 if (pending_breakpoints[i].address == address)
122 if (pending_breakpoints[i].address == 0)
127 pending_breakpoints[i].address = address;
128 ReadProcessMemory (hProcess,
130 &(pending_breakpoints[i].real_byte),
133 WriteProcessMemory (hProcess,
135 (LPVOID)int3, 1, &rv);
136 if (i >= num_breakpoints)
137 num_breakpoints = i+1;
141 remove_breakpoint (unsigned int address)
145 for (i=0; i<num_breakpoints; i++)
147 if (pending_breakpoints[i].address == address)
149 pending_breakpoints[i].address = 0;
150 WriteProcessMemory (hProcess,
152 &(pending_breakpoints[i].real_byte),
161 lookup_thread_id (DWORD threadId, int *tix)
165 for (i=0; i<num_active_threads; i++)
166 if (active_thread_ids[i] == threadId)
170 return active_threads[i];
176 set_step_threads (int threadId, int trace)
179 HANDLE thread = lookup_thread_id (threadId, &tix);
181 rv = GetThreadContext (thread, &context);
184 thread_step_flags[tix] = trace;
186 context.EFlags |= 0x100; /* TRAP (single step) flag */
188 context.EFlags &= ~0x100; /* TRAP (single step) flag */
189 SetThreadContext (thread, &context);
197 for (i=0; i<num_active_threads; i++)
199 GetThreadContext (active_threads[i], &context);
200 s = context.EFlags & 0x0100;
201 if (!s && thread_step_flags[i])
203 set_step_threads (active_thread_ids[i], 1);
209 dll_sort (const void *va, const void *vb)
211 DllInfo *a = (DllInfo *)va;
212 DllInfo *b = (DllInfo *)vb;
213 if (a->base_address < b->base_address)
219 addr2dllname (unsigned int addr)
222 for (i=num_dlls-1; i>=0; i--)
224 if (dll_info[i].base_address < addr)
226 return dll_info[i].name;
233 dump_registers (HANDLE thread)
235 context.ContextFlags = CONTEXT_FULL;
236 GetThreadContext (thread, &context);
237 printf ("eax %08lx ebx %08lx ecx %08lx edx %08lx eip\n",
238 context.Eax, context.Ebx, context.Ecx, context.Edx);
239 printf ("esi %08lx edi %08lx ebp %08lx esp %08lx %08lx\n",
240 context.Esi, context.Esi, context.Ebp, context.Esp, context.Eip);
243 typedef struct Edge {
245 unsigned int from_pc;
253 store_call_edge (unsigned int from_pc, unsigned int to_pc)
256 unsigned int h = ((from_pc + to_pc)>>4) & 4095;
257 for (e=edges[h]; e; e=e->next)
258 if (e->from_pc == from_pc && e->to_pc == to_pc)
262 e = (Edge *)malloc (sizeof (Edge));
265 e->from_pc = from_pc;
273 write_call_edges (FILE *f)
277 for (h=0; h<4096; h++)
278 for (e=edges[h]; e; e=e->next)
279 fwrite (&(e->from_pc), 1, 3*sizeof (unsigned int), f);
283 wide_strdup (char *cp)
285 unsigned short *s = (unsigned short *)cp;
288 for (len=0; s[len]; len++);
289 rv = (char *)malloc (len+1);
290 for (len=0; s[len]; len++)
297 run_program (char *cmdline)
304 memset (&startup, 0, sizeof (startup));
305 startup.cb = sizeof (startup);
307 if (!CreateProcess (0, cmdline, 0, 0, 0,
308 CREATE_NEW_PROCESS_GROUP
311 | DEBUG_ONLY_THIS_PROCESS,
312 0, 0, &startup, &procinfo))
314 fprintf (stderr, "Can't create process: error %ld\n", GetLastError ());
318 hProcess = procinfo.hProcess;
320 printf ("procinfo: %08x %08x %08x %08x\n",
321 hProcess, procinfo.hThread, procinfo.dwProcessId, procinfo.dwThreadId);
324 active_threads[0] = procinfo.hThread;
325 active_thread_ids[0] = procinfo.dwThreadId;
326 thread_step_flags[0] = stepping_enabled;
327 num_active_threads = 1;
329 dll_info[0].base_address = 0;
330 dll_info[0].pcount = 0;
331 dll_info[0].scount = 0;
332 dll_info[0].name = cmdline;
335 SetThreadPriority (procinfo.hThread, THREAD_PRIORITY_IDLE);
337 context.ContextFlags = CONTEXT_FULL;
339 ResumeThread (procinfo.hThread);
345 tracefile = fopen ("trace.ssp", "w");
349 perror ("trace.ssp");
359 int contv = DBG_CONTINUE;
361 event.dwDebugEventCode = -1;
362 if (!WaitForDebugEvent (&event, INFINITE))
364 printf ("idle...\n");
367 hThread = lookup_thread_id (event.dwThreadId, &tix);
370 printf ("DE: %x/%d %d %d ",
372 event.dwDebugEventCode, num_active_threads);
373 for (src=0; src<num_active_threads; src++)
375 int sc = SuspendThread (active_threads[src]);
376 int rv = GetThreadContext (active_threads[src], &context);
377 ResumeThread (active_threads[src]);
378 printf (" [%x,%x,%x]",
379 active_threads[src], context.Eip, active_thread_ids[src]);
384 switch (event.dwDebugEventCode)
387 case CREATE_PROCESS_DEBUG_EVENT:
390 case CREATE_THREAD_DEBUG_EVENT:
392 printf ("create thread %08lx at %08x %s\n",
394 (int)event.u.CreateThread.lpStartAddress,
395 addr2dllname ((unsigned int)event.u.CreateThread.lpStartAddress));
397 active_thread_ids[num_active_threads] = event.dwThreadId;
398 active_threads[num_active_threads] = event.u.CreateThread.hThread;
399 thread_return_address[num_active_threads] = 0;
400 num_active_threads++;
402 if (trace_all_threads && stepping_enabled)
404 thread_step_flags[num_active_threads-1] = stepping_enabled;
405 add_breakpoint ((int)event.u.CreateThread.lpStartAddress);
410 case EXIT_THREAD_DEBUG_EVENT:
412 printf ("exit thread %08lx, code=%ld\n",
414 event.u.ExitThread.dwExitCode);
416 for (src=0, dest=0; src<num_active_threads; src++)
417 if (active_thread_ids[src] != event.dwThreadId)
419 active_thread_ids[dest] = active_thread_ids[src];
420 active_threads[dest] = active_threads[src];
423 num_active_threads = dest;
426 case EXCEPTION_DEBUG_EVENT:
427 rv = GetThreadContext (hThread, &context);
428 switch (event.u.Exception.ExceptionRecord.ExceptionCode)
430 case STATUS_BREAKPOINT:
431 if (remove_breakpoint ((int)event.u.Exception.ExceptionRecord.ExceptionAddress))
435 SetThreadContext (hThread, &context);
436 if (ReadProcessMemory (hProcess, (void *)context.Esp, &rv, 4, &rv))
437 thread_return_address[tix] = rv;
439 set_step_threads (event.dwThreadId, stepping_enabled);
440 case STATUS_SINGLE_STEP:
442 pc = (unsigned int)event.u.Exception.ExceptionRecord.ExceptionAddress;
443 sp = (unsigned int)context.Esp;
445 fprintf (tracefile, "%08x %08lx\n", pc, event.dwThreadId);
448 printf ("%d %08x\n", tix, pc);
455 for (i=num_dlls-1; i>=0; i--)
457 if (dll_info[i].base_address < context.Eip)
459 if (hThread == procinfo.hThread)
460 dll_info[i].pcount++;
462 dll_info[i].scount++;
468 if (pc < last_pc || pc > last_pc+10)
473 fprintf (stderr, " %08x %d %d \r",
474 pc, ncalls, opcode_count);
479 store_call_edge (last_pc, pc);
480 if (last_pc < KERNEL_ADDR && pc > KERNEL_ADDR)
484 ReadProcessMemory (hProcess,
489 printf ("call last_pc = %08x pc = %08x rv = %08x\n",
490 last_pc, pc, retaddr);
491 /* experimental - try to skip kernel calls for speed */
492 add_breakpoint (retaddr);
493 set_step_threads (event.dwThreadId, 0);
502 if (pc >= low_pc && pc < high_pc)
503 hits[(pc - low_pc)/2] ++;
508 printf ("exception %ld, ", event.u.Exception.dwFirstChance);
509 printf ("code: %lx flags: %lx\n",
510 event.u.Exception.ExceptionRecord.ExceptionCode,
511 event.u.Exception.ExceptionRecord.ExceptionFlags);
512 if (event.u.Exception.dwFirstChance == 1)
513 dump_registers (hThread);
515 contv = DBG_EXCEPTION_NOT_HANDLED;
522 if (pc == thread_return_address[tix])
524 if (context.EFlags & 0x100)
526 context.EFlags &= ~0x100; /* TRAP (single step) flag */
527 SetThreadContext (hThread, &context);
530 else if (stepping_enabled)
532 if (!(context.EFlags & 0x100))
534 context.EFlags |= 0x100; /* TRAP (single step) flag */
535 SetThreadContext (hThread, &context);
541 case OUTPUT_DEBUG_STRING_EVENT:
542 string = (char *)malloc (event.u.DebugString.nDebugStringLength+1);
543 i = ReadProcessMemory (hProcess,
544 event.u.DebugString.lpDebugStringData,
546 event.u.DebugString.nDebugStringLength,
550 printf ("error reading memory: %ld %ld\n", rv, GetLastError ());
553 printf ("ODS: %x/%d \"%s\"\n",
554 (int)hThread, tix, string);
556 if (strcmp (string, "ssp on") == 0)
558 stepping_enabled = 1;
559 set_step_threads (event.dwThreadId, 1);
562 if (strcmp (string, "ssp off") == 0)
564 stepping_enabled = 0;
565 set_step_threads (event.dwThreadId, 0);
571 case LOAD_DLL_DEBUG_EVENT:
573 printf ("load dll %08x:",
574 (int)event.u.LoadDll.lpBaseOfDll);
576 dll_ptr = (char *)"( u n k n o w n ) \0\0";
577 if (event.u.LoadDll.lpImageName)
579 ReadProcessMemory (hProcess,
580 event.u.LoadDll.lpImageName,
586 ReadProcessMemory (hProcess,
593 for (cp=dll_name; *cp; cp++)
595 if (*cp == '\\' || *cp == '/')
599 *cp = tolower ((unsigned char) *cp);
605 dll_info[num_dlls].base_address
606 = (unsigned int)event.u.LoadDll.lpBaseOfDll;
607 dll_info[num_dlls].pcount = 0;
608 dll_info[num_dlls].scount = 0;
609 dll_info[num_dlls].name = wide_strdup (dll_ptr);
611 printf (" %s\n", dll_info[num_dlls].name);
613 qsort (dll_info, num_dlls, sizeof (DllInfo), dll_sort);
617 case UNLOAD_DLL_DEBUG_EVENT:
619 printf ("unload dll\n");
622 case EXIT_PROCESS_DEBUG_EVENT:
624 printf ("process %08lx %08lx exit %ld\n",
625 event.dwProcessId, event.dwThreadId,
626 event.u.ExitProcess.dwExitCode);
633 ContinueDebugEvent (event.dwProcessId, event.dwThreadId, contv);
637 for (pc=low_pc; pc<high_pc; pc+=2)
639 count += hits[(pc - low_pc)/2];
641 printf ("total cycles: %d, counted cycles: %d\n", total_cycles, count);
649 usage (FILE * stream)
652 "Usage: %s [options] low_pc high_pc command...\n"
653 "Single-step profile COMMAND\n"
655 " -c, --console-trace trace every EIP value to the console. *Lots* slower.\n"
656 " -d, --disable disable single-stepping by default; use\n"
657 " OutputDebugString (\"ssp on\") to enable stepping\n"
658 " -e, --enable enable single-stepping by default; use\n"
659 " OutputDebugString (\"ssp off\") to disable stepping\n"
660 " -h, --help output usage information and exit\n"
661 " -l, --dll enable dll profiling. A chart of relative DLL usage\n"
662 " is produced after the run.\n"
663 " -s, --sub-threads trace sub-threads too. Dangerous if you have\n"
664 " race conditions.\n"
665 " -t, --trace-eip trace every EIP value to a file TRACE.SSP. This\n"
666 " gets big *fast*.\n"
667 " -v, --verbose output verbose messages about debug events.\n"
668 " -V, --version output version information and exit\n"
670 "Example: %s 0x401000 0x403000 hello.exe\n"
672 "", prog_name, prog_name);
673 if (stream == stdout)
675 "SSP - The Single Step Profiler\n"
677 "Original Author: DJ Delorie <dj@redhat.com>\n"
679 "The SSP is a program that uses the Win32 debug API to run a program\n"
680 "one ASM instruction at a time. It records the location of each\n"
681 "instruction used, how many times that instruction is used, and all\n"
682 "function calls. The results are saved in a format that is usable by\n"
683 "the profiling program \"gprof\", although gprof will claim the values\n"
684 "are seconds, they really are instruction counts. More on that later.\n"
686 "Because the SSP was originally designed to profile the cygwin DLL, it\n"
687 "does not automatically select a block of code to report statistics on.\n"
688 "You must specify the range of memory addresses to keep track of\n"
689 "manually, but it's not hard to figure out what to specify. Use the\n"
690 "\"objdump\" program to determine the bounds of the target's \".text\"\n"
691 "section. Let's say we're profiling cygwin1.dll. Make sure you've\n"
692 "built it with debug symbols (else gprof won't run) and run objdump\n"
695 " objdump -h cygwin1.dll\n"
697 "It will print a report like this:\n"
699 "cygwin1.dll: file format pei-i386\n"
702 "Idx Name Size VMA LMA File off Algn\n"
703 " 0 .text 0007ea00 61001000 61001000 00000400 2**2\n"
704 " CONTENTS, ALLOC, LOAD, READONLY, CODE, DATA\n"
705 " 1 .data 00008000 61080000 61080000 0007ee00 2**2\n"
706 " CONTENTS, ALLOC, LOAD, DATA\n"
709 "The only information we're concerned with are the VMA of the .text\n"
710 "section and the VMA of the section after it (sections are usually\n"
711 "contiguous; you can also add the Size to the VMA to get the end\n"
712 "address). In this case, the VMA is 0x61001000 and the ending address\n"
713 "is either 0x61080000 (start of .data method) or 0x0x6107fa00 (VMA+Size\n"
716 "There are two basic ways to use SSP - either profiling a whole\n"
717 "program, or selectively profiling parts of the program.\n"
719 "To profile a whole program, just run ssp without options. By default,\n"
720 "it will step the whole program. Here's a simple example, using the\n"
723 " ssp 0x61001000 0x61080000 hello.exe\n"
725 "This will step the whole program. It will take at least 8 minutes on\n"
726 "a PII/300 (yes, really). When it's done, it will create a file called\n"
727 "\"gmon.out\". You can turn this data file into a readable report with\n"
730 " gprof -b cygwin1.dll\n"
732 "The \"-b\" means 'skip the help pages'. You can omit this until you're\n"
733 "familiar with the report layout. The gprof documentation explains\n"
734 "a lot about this report, but ssp changes a few things. For example,\n"
735 "the first part of the report reports the amount of time spent in each\n"
736 "function, like this:\n"
738 "Each sample counts as 0.01 seconds.\n"
739 " %% cumulative self self total\n"
740 " time seconds seconds calls ms/call ms/call name\n"
741 " 10.02 231.22 72.43 46 1574.57 1574.57 strcspn\n"
742 " 7.95 288.70 57.48 130 442.15 442.15 strncasematch\n"
744 "The \"seconds\" columns are really CPU opcodes, 1/100 second per opcode.\n"
745 "So, \"231.22\" above means 23,122 opcodes. The ms/call values are 10x\n"
746 "too big; 1574.57 means 157.457 opcodes per call. Similar adjustments\n"
747 "need to be made for the \"self\" and \"children\" columns in the second\n"
748 "part of the report.\n"
750 "OK, so now we've got a huge report that took a long time to generate,\n"
751 "and we've identified a spot we want to work on optimizing. Let's say\n"
752 "it's the time() function. We can use SSP to selectively profile this\n"
753 "function by using OutputDebugString() to control SSP from within the\n"
754 "program. Here's a sample program:\n"
756 " #include <windows.h>\n"
760 " OutputDebugString(\"ssp on\");\n"
762 " OutputDebugString(\"ssp off\");\n"
765 "Then, add the \"-d\" option to ssp to default to *disabling* profiling.\n"
766 "The program will run at full speed until the first OutputDebugString,\n"
767 "then step until the second.\n"
769 " ssp -d 0x61001000 0x61080000 hello.exe\n"
771 "You can then use gprof (as usual) to see the performance profile for\n"
772 "just that portion of the program's execution.\n"
774 "There are many options to ssp. Since step-profiling makes your\n"
775 "program run about 1,000 times slower than normal, it's best to\n"
776 "understand all the options so that you can narrow down the parts\n"
777 "of your program you need to single-step.\n"
779 "\"-v\" - verbose. This prints messages about threads starting and\n"
780 "stopping, OutputDebugString calls, DLLs loading, etc.\n"
782 "\"-t\" and \"-c\" - tracing. With -t, *every* step's address is written\n"
783 "to the file \"trace.ssp\". This can be used to help debug functions,\n"
784 "since it can trace multiple threads. Clever use of scripts can match\n"
785 "addresses with disassembled opcodes if needed. Warning: creates\n"
786 "*huge* files, very quickly. \"-c\" prints each address to the console,\n"
787 "useful for debugging key chunks of assembler.\n"
788 "Use \"addr2line -C -f -s -e foo.exe < trace.ssp > lines.ssp\" and then\n"
789 "\"perl cvttrace\" to convert to symbolic traces.\n"
791 "\"-s\" - subthreads. Usually, you only need to trace the main thread,\n"
792 "but sometimes you need to trace all threads, so this enables that.\n"
793 "It's also needed when you want to profile a function that only a\n"
794 "subthread calls. However, using OutputDebugString automatically\n"
795 "enables profiling on the thread that called it, not the main thread.\n"
797 "\"-l\" - dll profiling. Generates a pretty table of how much time was\n"
798 "spent in each dll the program used. No sense optimizing a function in\n"
799 "your program if most of the time is spent in the DLL.\n"
801 "I usually use the -v, -s, and -l options:\n"
803 " ssp -v -s -l -d 0x61001000 0x61080000 hello.exe\n"
805 if (stream == stderr)
806 fprintf (stream, "Try '%s --help' for more information.\n", prog_name);
807 exit (stream == stderr ? 1 : 0);
813 const char *v = strchr (version, ':');
823 len = strchr (v, ' ') - v;
827 Single-Step Profiler\n\
828 Copyright 2000, 2001, 2002 Red Hat, Inc.\n\
830 ", prog_name, len, v, __DATE__);
834 main (int argc, char **argv)
837 int total_pcount = 0, total_scount = 0;
842 prog_name = strrchr (argv[0], '/');
843 if (prog_name == NULL)
844 prog_name = strrchr (argv[0], '\\');
845 if (prog_name == NULL)
850 while ((c = getopt_long (argc, argv, opts, longopts, NULL)) != EOF)
854 printf ("tracing *all* $eip to the console\n");
858 printf ("stepping disabled; enable via OutputDebugString (\"ssp on\")\n");
859 stepping_enabled = 0;
862 printf ("stepping enabled; disable via OutputDebugString (\"ssp off\")\n");
863 stepping_enabled = 1;
869 printf ("profiling dll usage\n");
873 printf ("tracing all sub-threads too, not just the main one\n");
874 trace_all_threads = 1;
877 printf ("tracing all $eip to trace.ssp\n");
881 printf ("verbose messages enabled\n");
891 if ( (argc - optind) < 3 )
893 sscanf (argv[optind++], "%i", &low_pc);
894 sscanf (argv[optind++], "%i", &high_pc);
896 if (low_pc > high_pc-8)
898 fprintf (stderr, "Hey, low_pc must be lower than high_pc\n");
902 hits = (HISTCOUNTER *)malloc (high_pc-low_pc+4);
903 memset (hits, 0, high_pc-low_pc+4);
905 fprintf (stderr, "prun: [%08x,%08x] Running '%s'\n",
906 low_pc, high_pc, argv[optind]);
908 run_program (argv[optind]);
912 hdr.ncnt = high_pc-low_pc + sizeof (hdr);
913 hdr.version = GMONVERSION;
916 gmon = fopen ("gmon.out", "wb");
917 fwrite (&hdr, 1, sizeof (hdr), gmon);
918 fwrite (hits, 1, high_pc-low_pc, gmon);
919 write_call_edges (gmon);
924 /* 1234567 123% 1234567 123% 12345678 xxxxxxxxxxx */
925 printf (" Main-Thread Other-Thread BaseAddr DLL Name\n");
929 for (i=0; i<num_dlls; i++)
931 total_pcount += dll_info[i].pcount;
932 total_scount += dll_info[i].scount;
935 if (total_pcount == 0) total_pcount++;
936 if (total_scount == 0) total_scount++;
938 for (i=0; i<num_dlls; i++)
939 if (dll_info[i].pcount || dll_info[i].scount)
941 printf ("%7d %3d%% %7d %3d%% %08x %s\n",
943 (dll_info[i].pcount*100)/opcode_count,
945 (dll_info[i].scount*100)/opcode_count,
946 dll_info[i].base_address,