2 * Copyright (C) 2013 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
20 #include <sys/resource.h>
27 #include "bandwidth.h"
35 option_t bandwidth_opts[] = {
37 { "num_warm_loops", true },
38 { "num_loops", true },
43 option_t per_core_opts[] = {
45 { "num_warm_loops", true},
46 { "num_loops", true },
51 option_t multithread_opts[] = {
53 { "num_warm_loops", true},
54 { "num_loops", true },
56 { "num_threads", true },
62 const char *char_value;
64 typedef std::map<const char*, arg_value_t> arg_t;
66 bool processBandwidthOptions(int argc, char** argv, option_t options[],
68 for (int i = 1; i < argc; i++) {
69 if (argv[i][0] == '-' && argv[i][1] == '-' && !isdigit(argv[i][2])) {
70 char *arg = &argv[i][2];
72 for (int j = 0; options[j].name != NULL; j++) {
73 if (strcmp(arg, options[j].name) == 0) {
74 const char *name = options[j].name;
76 printf("The option --%s requires an argument.\n", name);
79 if (options[j].int_type) {
80 (*values)[name].int_value = strtol(argv[++i], NULL, 0);
82 (*values)[name].char_value = argv[++i];
92 BandwidthBenchmark *createBandwidthBenchmarkObject(arg_t values) {
93 BandwidthBenchmark *bench = NULL;
95 const char *name = values["type"].char_value;
97 if (values.count("size") > 0) {
98 size = values["size"].int_value;
100 if (strcmp(name, "copy_ldrd_strd") == 0) {
101 bench = new CopyLdrdStrdBenchmark();
102 } else if (strcmp(name, "copy_ldmia_stmia") == 0) {
103 bench = new CopyLdmiaStmiaBenchmark();
104 } else if (strcmp(name, "copy_vld_vst") == 0) {
105 bench = new CopyVldVstBenchmark();
106 } else if (strcmp(name, "copy_vldmia_vstmia") == 0) {
107 bench = new CopyVldmiaVstmiaBenchmark();
108 } else if (strcmp(name, "memcpy") == 0) {
109 bench = new MemcpyBenchmark();
110 } else if (strcmp(name, "write_strd") == 0) {
111 bench = new WriteStrdBenchmark();
112 } else if (strcmp(name, "write_stmia") == 0) {
113 bench = new WriteStmiaBenchmark();
114 } else if (strcmp(name, "write_vst") == 0) {
115 bench = new WriteVstBenchmark();
116 } else if (strcmp(name, "write_vstmia") == 0) {
117 bench = new WriteVstmiaBenchmark();
118 } else if (strcmp(name, "memset") == 0) {
119 bench = new MemsetBenchmark();
120 } else if (strcmp(name, "read_ldrd") == 0) {
121 bench = new ReadLdrdBenchmark();
122 } else if (strcmp(name, "read_ldmia") == 0) {
123 bench = new ReadLdmiaBenchmark();
124 } else if (strcmp(name, "read_vld") == 0) {
125 bench = new ReadVldBenchmark();
126 } else if (strcmp(name, "read_vldmia") == 0) {
127 bench = new ReadVldmiaBenchmark();
129 printf("Unknown type name %s\n", name);
133 if (!bench->setSize(values["size"].int_value)) {
134 printf("Failed to allocate buffers for benchmark.\n");
138 if (values.count("num_warm_loops") > 0) {
139 bench->set_num_loops(values["num_warm_loops"].int_value);
141 if (values.count("num_loops") > 0) {
142 bench->set_num_loops(values["num_loops"].int_value);
148 bool getAvailCpus(std::vector<int> *cpu_list) {
152 if (sched_getaffinity(0, sizeof(cpuset), &cpuset) != 0) {
153 perror("sched_getaffinity failed.");
157 for (int i = 0; i < CPU_SETSIZE; i++) {
158 if (CPU_ISSET(i, &cpuset)) {
159 cpu_list->push_back(i);
168 BandwidthBenchmark *bench;
173 void *runBandwidthThread(void *data) {
174 thread_arg_t *arg = reinterpret_cast<thread_arg_t *>(data);
176 if (arg->core >= 0) {
179 CPU_SET(arg->core, &cpuset);
180 if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
181 perror("sched_setaffinity failed");
186 // Spinloop waiting for the run variable to get set to true.
191 for (int run = 1; ; run++) {
194 // Throw away the last data point since it's possible not
195 // all of the threads are running at this point.
198 avg_mb = (avg_mb/run) * (run-1) + arg->bench->mb_per_sec()/run;
200 arg->avg_mb = avg_mb;
205 bool processThreadArgs(int argc, char** argv, option_t options[],
207 // Use some smaller values for the number of loops.
208 (*values)["num_warm_loops"].int_value = 1000000;
209 (*values)["num_loops"].int_value = 10000000;
211 if (!processBandwidthOptions(argc, argv, options, values)) {
214 if (values->count("size") > 0 && ((*values)["size"].int_value % 64) != 0) {
215 printf("The size values must be a multiple of 64.\n");
218 if (values->count("type") == 0) {
219 printf("Must specify the type value.\n");
223 BandwidthBenchmark *bench = createBandwidthBenchmarkObject(*values);
228 if (setpriority(PRIO_PROCESS, 0, -20)) {
229 perror("Unable to raise priority of process.");
233 printf("Calculating optimum run time...\n");
234 nsecs_t t = system_time();
236 t = system_time() - t;
237 // Since this is only going to be running single threaded, assume that
238 // if the number is set to ten times this value, we should get at least
239 // a couple of samples per thread.
240 int run_time = int((t/1000000000.0)*10 + 0.5) + 5;
242 (*values)["run_time"].int_value = run_time;
243 (*values)["size"].int_value = bench->size();
244 (*values)["num_warm_loops"].int_value = bench->num_warm_loops();
245 (*values)["num_loops"].int_value = bench->num_loops();
251 bool runThreadedTest(thread_arg_t args[], int num_threads, int run_time) {
252 pthread_t threads[num_threads];
253 volatile bool run = false;
256 for (int i = 0; i < num_threads; i++) {
258 rc = pthread_create(&threads[i], NULL, runBandwidthThread,
261 printf("Failed to launch thread %d\n", i);
266 // Kick start the threads.
269 // Let the threads run.
275 // Wait for the threads to complete.
276 for (int i = 0; i < num_threads; i++) {
277 rc = pthread_join(threads[i], NULL);
279 printf("Thread %d failed to join.\n", i);
282 printf("Thread %d: bandwidth using %s %0.2f MB/s\n", i,
283 args[i].bench->getName(), args[i].avg_mb);
289 int per_core_bandwidth(int argc, char** argv) {
291 if (!processThreadArgs(argc, argv, per_core_opts, &values)) {
295 std::vector<int> cpu_list;
296 if (!getAvailCpus(&cpu_list)) {
297 printf("Failed to get available cpu list.\n");
301 thread_arg_t args[cpu_list.size()];
304 for (std::vector<int>::iterator it = cpu_list.begin();
305 it != cpu_list.end(); ++it, ++i) {
307 args[i].bench = createBandwidthBenchmarkObject(values);
308 if (!args[i].bench) {
313 printf("Running on %d cores\n", cpu_list.size());
314 printf(" run_time = %ds\n", values["run_time"].int_value);
315 printf(" size = %d\n", values["size"].int_value);
316 printf(" num_warm_loops = %d\n", values["num_warm_loops"].int_value);
317 printf(" num_loops = %d\n", values["num_loops"].int_value);
320 if (!runThreadedTest(args, cpu_list.size(), values["run_time"].int_value)) {
327 int multithread_bandwidth(int argc, char** argv) {
329 if (!processThreadArgs(argc, argv, multithread_opts, &values)) {
332 if (values.count("num_threads") == 0) {
333 printf("Must specify the num_threads value.\n");
336 int num_threads = values["num_threads"].int_value;
338 thread_arg_t args[num_threads];
341 for (int i = 0; i < num_threads; i++) {
343 args[i].bench = createBandwidthBenchmarkObject(values);
344 if (!args[i].bench) {
349 printf("Running %d threads\n", num_threads);
350 printf(" run_time = %ds\n", values["run_time"].int_value);
351 printf(" size = %d\n", values["size"].int_value);
352 printf(" num_warm_loops = %d\n", values["num_warm_loops"].int_value);
353 printf(" num_loops = %d\n", values["num_loops"].int_value);
356 if (!runThreadedTest(args, num_threads, values["run_time"].int_value)) {
363 bool run_bandwidth_benchmark(int argc, char** argv, const char *name,
364 std::vector<BandwidthBenchmark*> bench_objs) {
366 values["size"].int_value = 0;
367 values["num_warm_loops"].int_value = 0;
368 values["num_loops"].int_value = 0;
369 if (!processBandwidthOptions(argc, argv, bandwidth_opts, &values)) {
373 size_t size = values["size"].int_value;
374 if ((size % 64) != 0) {
375 printf("The size value must be a multiple of 64.\n");
379 if (setpriority(PRIO_PROCESS, 0, -20)) {
380 perror("Unable to raise priority of process.");
384 bool preamble_printed = false;
385 size_t num_warm_loops = values["num_warm_loops"].int_value;
386 size_t num_loops = values["num_loops"].int_value;
387 for (std::vector<BandwidthBenchmark*>::iterator it = bench_objs.begin();
388 it != bench_objs.end(); ++it) {
389 if (!(*it)->canRun()) {
392 if (!(*it)->setSize(values["num_warm_loops"].int_value)) {
393 printf("Failed creating buffer for bandwidth test.\n");
396 if (num_warm_loops) {
397 (*it)->set_num_warm_loops(num_warm_loops);
400 (*it)->set_num_loops(num_loops);
402 if (!preamble_printed) {
403 preamble_printed = true;
404 printf("Benchmarking %s bandwidth\n", name);
405 printf(" size = %d\n", (*it)->size());
406 printf(" num_warm_loops = %d\n", (*it)->num_warm_loops());
407 printf(" num_loops = %d\n\n", (*it)->num_loops());
410 printf(" %s bandwidth with %s: %0.2f MB/s\n", name, (*it)->getName(),
411 (*it)->mb_per_sec());
417 int copy_bandwidth(int argc, char** argv) {
418 std::vector<BandwidthBenchmark*> bench_objs;
419 bench_objs.push_back(new CopyLdrdStrdBenchmark());
420 bench_objs.push_back(new CopyLdmiaStmiaBenchmark());
421 bench_objs.push_back(new CopyVldVstBenchmark());
422 bench_objs.push_back(new CopyVldmiaVstmiaBenchmark());
423 bench_objs.push_back(new MemcpyBenchmark());
425 if (!run_bandwidth_benchmark(argc, argv, "copy", bench_objs)) {
431 int write_bandwidth(int argc, char** argv) {
432 std::vector<BandwidthBenchmark*> bench_objs;
433 bench_objs.push_back(new WriteStrdBenchmark());
434 bench_objs.push_back(new WriteStmiaBenchmark());
435 bench_objs.push_back(new WriteVstBenchmark());
436 bench_objs.push_back(new WriteVstmiaBenchmark());
437 bench_objs.push_back(new MemsetBenchmark());
439 if (!run_bandwidth_benchmark(argc, argv, "write", bench_objs)) {
446 int read_bandwidth(int argc, char** argv) {
447 std::vector<BandwidthBenchmark*> bench_objs;
448 bench_objs.push_back(new ReadLdrdBenchmark());
449 bench_objs.push_back(new ReadLdmiaBenchmark());
450 bench_objs.push_back(new ReadVldBenchmark());
451 bench_objs.push_back(new ReadVldmiaBenchmark());
453 if (!run_bandwidth_benchmark(argc, argv, "read", bench_objs)) {