perf record: Add num-synthesize-threads option

author Stephane Eranian <eranian@google.com>

Wed, 22 Apr 2020 15:50:38 +0000 (08:50 -0700)

committer Arnaldo Carvalho de Melo <acme@redhat.com>

Thu, 23 Apr 2020 14:10:41 +0000 (11:10 -0300)
author Stephane Eranian <eranian@google.com>
Wed, 22 Apr 2020 15:50:38 +0000 (08:50 -0700)
committer Arnaldo Carvalho de Melo <acme@redhat.com>
Thu, 23 Apr 2020 14:10:41 +0000 (11:10 -0300)
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt

index b3f3b3f..6e8b464 100644 (file)
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -596,6 +596,10 @@ Make a copy of /proc/kcore and place it into a directory with the perf data file
  Limit the sample data max size, <size> is expected to be a number with
  appended unit character - B/K/M/G
  
+--num-thread-synthesize::
+       The number of threads to run when synthesizing events for existing processes.
+       By default, the number of threads equals 1.
+
  SEE ALSO
  --------
  linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1]
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c

index 1ab349a..2e8011f 100644 (file)
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -43,6 +43,7 @@
  #include "util/time-utils.h"
  #include "util/units.h"
  #include "util/bpf-event.h"
+#include "util/util.h"
  #include "asm/bug.h"
  #include "perf.h"
  
@@ -50,6 +51,7 @@
  #include <inttypes.h>
  #include <locale.h>
  #include <poll.h>
+#include <pthread.h>
  #include <unistd.h>
  #include <sched.h>
  #include <signal.h>
@@ -503,6 +505,20 @@ static int process_synthesized_event(struct perf_tool *tool,
         return record__write(rec, NULL, event, event->header.size);
  }
  
+static int process_locked_synthesized_event(struct perf_tool *tool,
+                                    union perf_event *event,
+                                    struct perf_sample *sample __maybe_unused,
+                                    struct machine *machine __maybe_unused)
+{
+       static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER;
+       int ret;
+
+       pthread_mutex_lock(&synth_lock);
+       ret = process_synthesized_event(tool, event, sample, machine);
+       pthread_mutex_unlock(&synth_lock);
+       return ret;
+}
+
  static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
  {
         struct record *rec = to;
@@ -1288,6 +1304,7 @@ static int record__synthesize(struct record *rec, bool tail)
         struct perf_tool *tool = &rec->tool;
         int fd = perf_data__fd(data);
         int err = 0;
+       event_op f = process_synthesized_event;
  
         if (rec->opts.tail_synthesize != tail)
                 return 0;
@@ -1402,9 +1419,18 @@ static int record__synthesize(struct record *rec, bool tail)
         if (err < 0)
                 pr_warning("Couldn't synthesize cgroup events.\n");
  
+       if (rec->opts.nr_threads_synthesize > 1) {
+               perf_set_multithreaded();
+               f = process_locked_synthesized_event;
+       }
+
         err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads,
-                                           process_synthesized_event, opts->sample_address,
-                                           1);
+                                           f, opts->sample_address,
+                                           rec->opts.nr_threads_synthesize);
+
+       if (rec->opts.nr_threads_synthesize > 1)
+               perf_set_singlethreaded();
+
  out:
         return err;
  }
@@ -2232,6 +2258,7 @@ static struct record record = {
                         .default_per_cpu = true,
                 },
                 .mmap_flush          = MMAP_FLUSH_DEFAULT,
+               .nr_threads_synthesize = 1,
         },
         .tool = {
                 .sample         = process_sample_event,
@@ -2421,6 +2448,9 @@ static struct option __record_options[] = {
  #endif
         OPT_CALLBACK(0, "max-size", &record.output_max_size,
                      "size", "Limit the maximum size of the output file", parse_output_max_size),
+       OPT_UINTEGER(0, "num-thread-synthesize",
+                    &record.opts.nr_threads_synthesize,
+                    "number of threads to run for event synthesis"),
         OPT_END()
  };
  
diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h

index 2431645..923565c 100644 (file)
--- a/tools/perf/util/record.h
+++ b/tools/perf/util/record.h
@@ -68,6 +68,7 @@ struct record_opts {
         int           affinity;
         int           mmap_flush;
         unsigned int  comp_level;
+       unsigned int  nr_threads_synthesize;
  };
  
  extern const char * const *record_usage;
author	Stephane Eranian <eranian@google.com>
	Wed, 22 Apr 2020 15:50:38 +0000 (08:50 -0700)
committer	Arnaldo Carvalho de Melo <acme@redhat.com>
	Thu, 23 Apr 2020 14:10:41 +0000 (11:10 -0300)
tools/perf/Documentation/perf-record.txt		patch \| blob \| history
tools/perf/builtin-record.c		patch \| blob \| history
tools/perf/util/record.h		patch \| blob \| history