This is really only useful for 32 bit devices, and it's not all that useful.
It was designed to check the different memory instructions and indicate which
would likely give the best results for memcpy/memset.
However, we aren't doing any active performance improvements for these 32 bit
routines any more and it can be confusing that this is 32 bit only, so it's
time to go.
Test: NA
Change-Id: Ib56a2ca0a603ac9477422431c3c13f3112366473
+++ /dev/null
-// Copyright 2006 The Android Open Source Project
-
-cc_binary {
- name: "memtest",
-
- srcs: [
- "memtest.cpp",
- "fptest.cpp",
- "thumb.cpp",
- "bandwidth.cpp",
- ],
-
- cflags: [
- "-fomit-frame-pointer",
- "-Wall",
- "-Werror",
- ],
-
- sanitize: {
- never: true,
- },
-
- enabled: false,
- arch: {
- arm: {
- enabled: true,
- instruction_set: "thumb",
- },
- },
-
-}
+++ /dev/null
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "bandwidth.h"
-
-#include <ctype.h>
-#include <pthread.h>
-#include <sched.h>
-#include <sys/resource.h>
-#include <sys/time.h>
-#include <unistd.h>
-
-#include <map>
-#include <memory>
-#include <vector>
-
-
-typedef struct {
- const char *name;
- bool int_type;
-} option_t;
-
-option_t bandwidth_opts[] = {
- { "size", true },
- { "num_warm_loops", true },
- { "num_loops", true },
- { "type", false },
- { NULL, false },
-};
-
-option_t per_core_opts[] = {
- { "size", true },
- { "num_warm_loops", true},
- { "num_loops", true },
- { "type", false },
- { NULL, false },
-};
-
-option_t multithread_opts[] = {
- { "size", true },
- { "num_warm_loops", true},
- { "num_loops", true },
- { "type", false },
- { "num_threads", true },
- { NULL, false },
-};
-
-typedef union {
- int int_value;
- const char *char_value;
-} arg_value_t;
-typedef std::map<const char*, arg_value_t> arg_t;
-
-bool processBandwidthOptions(int argc, char** argv, option_t options[],
- arg_t *values) {
- for (int i = 1; i < argc; i++) {
- if (argv[i][0] == '-' && argv[i][1] == '-' && !isdigit(argv[i][2])) {
- char *arg = &argv[i][2];
-
- for (int j = 0; options[j].name != NULL; j++) {
- if (strcmp(arg, options[j].name) == 0) {
- const char *name = options[j].name;
- if (i == argc - 1) {
- printf("The option --%s requires an argument.\n", name);
- return false;
- }
- if (options[j].int_type) {
- (*values)[name].int_value = strtol(argv[++i], NULL, 0);
- } else {
- (*values)[name].char_value = argv[++i];
- }
- }
- }
- }
- }
-
- return true;
-}
-
-BandwidthBenchmark *createBandwidthBenchmarkObject(arg_t values) {
- BandwidthBenchmark *bench = NULL;
-
- const char *name = values["type"].char_value;
- size_t size = 0;
- if (values.count("size") > 0) {
- size = values["size"].int_value;
- }
- if (strcmp(name, "copy_ldrd_strd") == 0) {
- bench = new CopyLdrdStrdBenchmark();
- } else if (strcmp(name, "copy_ldmia_stmia") == 0) {
- bench = new CopyLdmiaStmiaBenchmark();
- } else if (strcmp(name, "copy_vld1_vst1") == 0) {
- bench = new CopyVld1Vst1Benchmark();
- } else if (strcmp(name, "copy_vldr_vstr") == 0) {
- bench = new CopyVldrVstrBenchmark();
- } else if (strcmp(name, "copy_vldmia_vstmia") == 0) {
- bench = new CopyVldmiaVstmiaBenchmark();
- } else if (strcmp(name, "memcpy") == 0) {
- bench = new MemcpyBenchmark();
- } else if (strcmp(name, "write_strd") == 0) {
- bench = new WriteStrdBenchmark();
- } else if (strcmp(name, "write_stmia") == 0) {
- bench = new WriteStmiaBenchmark();
- } else if (strcmp(name, "write_vst1") == 0) {
- bench = new WriteVst1Benchmark();
- } else if (strcmp(name, "write_vstr") == 0) {
- bench = new WriteVstrBenchmark();
- } else if (strcmp(name, "write_vstmia") == 0) {
- bench = new WriteVstmiaBenchmark();
- } else if (strcmp(name, "memset") == 0) {
- bench = new MemsetBenchmark();
- } else if (strcmp(name, "read_ldrd") == 0) {
- bench = new ReadLdrdBenchmark();
- } else if (strcmp(name, "read_ldmia") == 0) {
- bench = new ReadLdmiaBenchmark();
- } else if (strcmp(name, "read_vld1") == 0) {
- bench = new ReadVld1Benchmark();
- } else if (strcmp(name, "read_vldr") == 0) {
- bench = new ReadVldrBenchmark();
- } else if (strcmp(name, "read_vldmia") == 0) {
- bench = new ReadVldmiaBenchmark();
- } else {
- printf("Unknown type name %s\n", name);
- return NULL;
- }
-
- if (!bench->setSize(size)) {
- printf("Failed to allocate buffers for benchmark.\n");
- delete bench;
- return NULL;
- }
-
- if (values.count("num_warm_loops") > 0) {
- bench->set_num_loops(values["num_warm_loops"].int_value);
- }
- if (values.count("num_loops") > 0) {
- bench->set_num_loops(values["num_loops"].int_value);
- }
-
- return bench;
-}
-
-bool getAvailCpus(std::vector<int> *cpu_list) {
- cpu_set_t cpuset;
-
- CPU_ZERO(&cpuset);
- if (sched_getaffinity(0, sizeof(cpuset), &cpuset) != 0) {
- perror("sched_getaffinity failed.");
- return false;
- }
-
- for (int i = 0; i < CPU_SETSIZE; i++) {
- if (CPU_ISSET(i, &cpuset)) {
- cpu_list->push_back(i);
- }
- }
-
- return true;
-}
-
-typedef struct {
- int core;
- BandwidthBenchmark *bench;
- double avg_mb;
- volatile bool *run;
-} thread_arg_t;
-
-void *runBandwidthThread(void *data) {
- thread_arg_t *arg = reinterpret_cast<thread_arg_t *>(data);
-
- if (arg->core >= 0) {
- cpu_set_t cpuset;
- CPU_ZERO(&cpuset);
- CPU_SET(arg->core, &cpuset);
- if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
- perror("sched_setaffinity failed");
- return NULL;
- }
- }
-
- // Spinloop waiting for the run variable to get set to true.
- while (!*arg->run) {
- }
-
- double avg_mb = 0;
- for (int run = 1; ; run++) {
- arg->bench->run();
- if (!*arg->run) {
- // Throw away the last data point since it's possible not
- // all of the threads are running at this point.
- break;
- }
- avg_mb = (avg_mb/run) * (run-1) + arg->bench->mb_per_sec()/run;
- }
- arg->avg_mb = avg_mb;
-
- return NULL;
-}
-
-bool processThreadArgs(int argc, char** argv, option_t options[],
- arg_t *values) {
- // Use some smaller values for the number of loops.
- (*values)["num_warm_loops"].int_value = 1000000;
- (*values)["num_loops"].int_value = 10000000;
-
- if (!processBandwidthOptions(argc, argv, options, values)) {
- return false;
- }
- if (values->count("size") > 0 && ((*values)["size"].int_value % 64) != 0) {
- printf("The size values must be a multiple of 64.\n");
- return false;
- }
- if (values->count("type") == 0) {
- printf("Must specify the type value.\n");
- return false;
- }
-
- std::unique_ptr<BandwidthBenchmark> bench{
- createBandwidthBenchmarkObject(*values)};
- if (!bench) {
- return false;
- }
-
- if (setpriority(PRIO_PROCESS, 0, -20)) {
- perror("Unable to raise priority of process.");
- return false;
- }
-
- printf("Calculating optimum run time...\n");
- nsecs_t t = system_time();
- bench->run();
- t = system_time() - t;
- // Since this is only going to be running single threaded, assume that
- // if the number is set to ten times this value, we should get at least
- // a couple of samples per thread.
- int run_time = int((t/1000000000.0)*10 + 0.5) + 5;
-
- (*values)["run_time"].int_value = run_time;
- (*values)["size"].int_value = bench->size();
- (*values)["num_warm_loops"].int_value = bench->num_warm_loops();
- (*values)["num_loops"].int_value = bench->num_loops();
-
- return true;
-}
-
-bool runThreadedTest(thread_arg_t args[], int num_threads, int run_time) {
- pthread_t threads[num_threads];
- volatile bool run = false;
-
- int rc;
- for (int i = 0; i < num_threads; i++) {
- args[i].run = &run;
- rc = pthread_create(&threads[i], NULL, runBandwidthThread,
- (void*)&args[i]);
- if (rc != 0) {
- printf("Failed to launch thread %d\n", i);
- return false;
- }
- }
-
- // Kick start the threads.
- run = true;
-
- // Let the threads run.
- sleep(run_time);
-
- // Stop the threads.
- run = false;
-
- // Wait for the threads to complete.
- for (int i = 0; i < num_threads; i++) {
- rc = pthread_join(threads[i], NULL);
- if (rc != 0) {
- printf("Thread %d failed to join.\n", i);
- return false;
- }
- printf("Thread %d: bandwidth using %s %0.2f MB/s\n", i,
- args[i].bench->getName(), args[i].avg_mb);
- }
-
- return true;
-}
-
-int per_core_bandwidth(int argc, char** argv) {
- arg_t values;
- if (!processThreadArgs(argc, argv, per_core_opts, &values)) {
- return -1;
- }
-
- std::vector<int> cpu_list;
- if (!getAvailCpus(&cpu_list)) {
- printf("Failed to get available cpu list.\n");
- return -1;
- }
-
- thread_arg_t args[cpu_list.size()];
-
- int i = 0;
- for (std::vector<int>::iterator it = cpu_list.begin();
- it != cpu_list.end(); ++it, ++i) {
- args[i].core = *it;
- args[i].bench = createBandwidthBenchmarkObject(values);
- if (!args[i].bench) {
- for (int j = 0; j < i; j++)
- delete args[j].bench;
- return -1;
- }
- }
-
- printf("Running on %d cores\n", cpu_list.size());
- printf(" run_time = %ds\n", values["run_time"].int_value);
- printf(" size = %d\n", values["size"].int_value);
- printf(" num_warm_loops = %d\n", values["num_warm_loops"].int_value);
- printf(" num_loops = %d\n", values["num_loops"].int_value);
- printf("\n");
-
- if (!runThreadedTest(args, cpu_list.size(), values["run_time"].int_value)) {
- return -1;
- }
-
- return 0;
-}
-
-int multithread_bandwidth(int argc, char** argv) {
- arg_t values;
- if (!processThreadArgs(argc, argv, multithread_opts, &values)) {
- return -1;
- }
- if (values.count("num_threads") == 0) {
- printf("Must specify the num_threads value.\n");
- return -1;
- }
- int num_threads = values["num_threads"].int_value;
-
- thread_arg_t args[num_threads];
-
- for (int i = 0; i < num_threads; i++) {
- args[i].core = -1;
- args[i].bench = createBandwidthBenchmarkObject(values);
- if (!args[i].bench) {
- for (int j = 0; j < i; j++)
- delete args[j].bench;
- return -1;
- }
- }
-
- printf("Running %d threads\n", num_threads);
- printf(" run_time = %ds\n", values["run_time"].int_value);
- printf(" size = %d\n", values["size"].int_value);
- printf(" num_warm_loops = %d\n", values["num_warm_loops"].int_value);
- printf(" num_loops = %d\n", values["num_loops"].int_value);
- printf("\n");
-
- if (!runThreadedTest(args, num_threads, values["run_time"].int_value)) {
- return -1;
- }
-
- return 0;
-}
-
-bool run_bandwidth_benchmark(int argc, char** argv, const char *name,
- std::vector<BandwidthBenchmark*> bench_objs) {
- arg_t values;
- values["size"].int_value = 0;
- values["num_warm_loops"].int_value = 0;
- values["num_loops"].int_value = 0;
- if (!processBandwidthOptions(argc, argv, bandwidth_opts, &values)) {
- return false;
- }
-
- size_t size = values["size"].int_value;
- if ((size % 64) != 0) {
- printf("The size value must be a multiple of 64.\n");
- return false;
- }
-
- if (setpriority(PRIO_PROCESS, 0, -20)) {
- perror("Unable to raise priority of process.");
- return false;
- }
-
- bool preamble_printed = false;
- size_t num_warm_loops = values["num_warm_loops"].int_value;
- size_t num_loops = values["num_loops"].int_value;
- for (std::vector<BandwidthBenchmark*>::iterator it = bench_objs.begin();
- it != bench_objs.end(); ++it) {
- if (!(*it)->canRun()) {
- continue;
- }
- if (!(*it)->setSize(values["size"].int_value)) {
- printf("Failed creating buffer for bandwidth test.\n");
- return false;
- }
- if (num_warm_loops) {
- (*it)->set_num_warm_loops(num_warm_loops);
- }
- if (num_loops) {
- (*it)->set_num_loops(num_loops);
- }
- if (!preamble_printed) {
- preamble_printed = true;
- printf("Benchmarking %s bandwidth\n", name);
- printf(" size = %d\n", (*it)->size());
- printf(" num_warm_loops = %d\n", (*it)->num_warm_loops());
- printf(" num_loops = %d\n\n", (*it)->num_loops());
- }
- (*it)->run();
- printf(" %s bandwidth with %s: %0.2f MB/s\n", name, (*it)->getName(),
- (*it)->mb_per_sec());
- }
-
- return true;
-}
-
-int copy_bandwidth(int argc, char** argv) {
- std::vector<BandwidthBenchmark*> bench_objs;
- bench_objs.push_back(new CopyLdrdStrdBenchmark());
- bench_objs.push_back(new CopyLdmiaStmiaBenchmark());
- bench_objs.push_back(new CopyVld1Vst1Benchmark());
- bench_objs.push_back(new CopyVldrVstrBenchmark());
- bench_objs.push_back(new CopyVldmiaVstmiaBenchmark());
- bench_objs.push_back(new MemcpyBenchmark());
-
- if (!run_bandwidth_benchmark(argc, argv, "copy", bench_objs)) {
- return -1;
- }
- return 0;
-}
-
-int write_bandwidth(int argc, char** argv) {
- std::vector<BandwidthBenchmark*> bench_objs;
- bench_objs.push_back(new WriteStrdBenchmark());
- bench_objs.push_back(new WriteStmiaBenchmark());
- bench_objs.push_back(new WriteVst1Benchmark());
- bench_objs.push_back(new WriteVstrBenchmark());
- bench_objs.push_back(new WriteVstmiaBenchmark());
- bench_objs.push_back(new MemsetBenchmark());
-
- if (!run_bandwidth_benchmark(argc, argv, "write", bench_objs)) {
- return -1;
- }
-
- return 0;
-}
-
-int read_bandwidth(int argc, char** argv) {
- std::vector<BandwidthBenchmark*> bench_objs;
- bench_objs.push_back(new ReadLdrdBenchmark());
- bench_objs.push_back(new ReadLdmiaBenchmark());
- bench_objs.push_back(new ReadVld1Benchmark());
- bench_objs.push_back(new ReadVldrBenchmark());
- bench_objs.push_back(new ReadVldmiaBenchmark());
-
- if (!run_bandwidth_benchmark(argc, argv, "read", bench_objs)) {
- return -1;
- }
- return 0;
-}
+++ /dev/null
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __BANDWIDTH_H__
-#define __BANDWIDTH_H__
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "utils/Compat.h"
-#include "memtest.h"
-
-// Bandwidth Class definitions.
-class BandwidthBenchmark {
-public:
- BandwidthBenchmark()
- : _size(0),
- _num_warm_loops(DEFAULT_NUM_WARM_LOOPS),
- _num_loops(DEFAULT_NUM_LOOPS) {}
- virtual ~BandwidthBenchmark() {}
-
- bool run() {
- if (_size == 0) {
- return false;
- }
- if (!canRun()) {
- return false;
- }
-
- bench(_num_warm_loops);
-
- nsecs_t t = system_time();
- bench(_num_loops);
- t = system_time() - t;
-
- _mb_per_sec = (_size*(_num_loops/_BYTES_PER_MB))/(t/_NUM_NS_PER_SEC);
-
- return true;
- }
-
- bool canRun() { return !usesNeon() || isNeonSupported(); }
-
- virtual bool setSize(size_t size) = 0;
-
- virtual const char *getName() = 0;
-
- virtual bool verify() = 0;
-
- virtual bool usesNeon() { return false; }
-
- bool isNeonSupported() {
-#if defined(__ARM_NEON__)
- return true;
-#else
- return false;
-#endif
- }
-
- // Accessors/mutators.
- double mb_per_sec() { return _mb_per_sec; }
- size_t num_warm_loops() { return _num_warm_loops; }
- size_t num_loops() { return _num_loops; }
- size_t size() { return _size; }
-
- void set_num_warm_loops(size_t num_warm_loops) {
- _num_warm_loops = num_warm_loops;
- }
- void set_num_loops(size_t num_loops) { _num_loops = num_loops; }
-
- // Static constants
- static const unsigned int DEFAULT_NUM_WARM_LOOPS = 1000000;
- static const unsigned int DEFAULT_NUM_LOOPS = 20000000;
-
-protected:
- virtual void bench(size_t num_loops) = 0;
-
- double _mb_per_sec;
- size_t _size;
- size_t _num_warm_loops;
- size_t _num_loops;
-
-private:
- // Static constants
- static const CONSTEXPR double _NUM_NS_PER_SEC = 1000000000.0;
- static const CONSTEXPR double _BYTES_PER_MB = 1024.0* 1024.0;
-};
-
-class CopyBandwidthBenchmark : public BandwidthBenchmark {
-public:
- CopyBandwidthBenchmark() : BandwidthBenchmark(), _src(NULL), _dst(NULL) { }
-
- bool setSize(size_t size) {
- if (_src) {
- free(_src);
- _src = NULL;
- }
- if (_dst) {
- free(_dst);
- _dst = NULL;
- }
-
- if (size == 0) {
- _size = DEFAULT_COPY_SIZE;
- } else {
- _size = size;
- }
-
- _src = reinterpret_cast<char*>(memalign(64, _size));
- if (!_src) {
- perror("Failed to allocate memory for test.");
- return false;
- }
- _dst = reinterpret_cast<char*>(memalign(64, _size));
- if (!_dst) {
- perror("Failed to allocate memory for test.");
- return false;
- }
-
- return true;
- }
- virtual ~CopyBandwidthBenchmark() {
- if (_src) {
- free(_src);
- _src = NULL;
- }
- if (_dst) {
- free(_dst);
- _dst = NULL;
- }
- }
-
- bool verify() {
- memset(_src, 0x23, _size);
- memset(_dst, 0, _size);
- bench(1);
- if (memcmp(_src, _dst, _size) != 0) {
- printf("Buffers failed to compare after one loop.\n");
- return false;
- }
-
- memset(_src, 0x23, _size);
- memset(_dst, 0, _size);
- _num_loops = 2;
- bench(2);
- if (memcmp(_src, _dst, _size) != 0) {
- printf("Buffers failed to compare after two loops.\n");
- return false;
- }
-
- return true;
- }
-
-protected:
- char *_src;
- char *_dst;
-
- static const unsigned int DEFAULT_COPY_SIZE = 8000;
-};
-
-class CopyLdrdStrdBenchmark : public CopyBandwidthBenchmark {
-public:
- CopyLdrdStrdBenchmark() : CopyBandwidthBenchmark() { }
- virtual ~CopyLdrdStrdBenchmark() {}
-
- const char *getName() { return "ldrd/strd"; }
-
-protected:
- // Copy using ldrd/strd instructions.
- void bench(size_t num_loops) {
- asm volatile(
- "stmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
-
- "mov r0, %0\n"
- "mov r1, %1\n"
- "mov r2, %2\n"
- "mov r3, %3\n"
-
- "0:\n"
- "mov r4, r2, lsr #6\n"
-
- "1:\n"
- "ldrd r6, r7, [r0]\n"
- "strd r6, r7, [r1]\n"
- "ldrd r6, r7, [r0, #8]\n"
- "strd r6, r7, [r1, #8]\n"
- "ldrd r6, r7, [r0, #16]\n"
- "strd r6, r7, [r1, #16]\n"
- "ldrd r6, r7, [r0, #24]\n"
- "strd r6, r7, [r1, #24]\n"
- "ldrd r6, r7, [r0, #32]\n"
- "strd r6, r7, [r1, #32]\n"
- "ldrd r6, r7, [r0, #40]\n"
- "strd r6, r7, [r1, #40]\n"
- "ldrd r6, r7, [r0, #48]\n"
- "strd r6, r7, [r1, #48]\n"
- "ldrd r6, r7, [r0, #56]\n"
- "strd r6, r7, [r1, #56]\n"
-
- "add r0, r0, #64\n"
- "add r1, r1, #64\n"
- "subs r4, r4, #1\n"
- "bgt 1b\n"
-
- "sub r0, r0, r2\n"
- "sub r1, r1, r2\n"
- "subs r3, r3, #1\n"
- "bgt 0b\n"
-
- "ldmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
- :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
- }
-};
-
-class CopyLdmiaStmiaBenchmark : public CopyBandwidthBenchmark {
-public:
- CopyLdmiaStmiaBenchmark() : CopyBandwidthBenchmark() { }
- virtual ~CopyLdmiaStmiaBenchmark() {}
-
- const char *getName() { return "ldmia/stmia"; }
-
-protected:
- // Copy using ldmia/stmia instructions.
- void bench(size_t num_loops) {
- asm volatile(
- "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
-
- "mov r0, %0\n"
- "mov r1, %1\n"
- "mov r2, %2\n"
- "mov r3, %3\n"
-
- "0:\n"
- "mov r4, r2, lsr #6\n"
-
- "1:\n"
- "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
- "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
- "subs r4, r4, #1\n"
- "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
- "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
- "bgt 1b\n"
-
- "sub r0, r0, r2\n"
- "sub r1, r1, r2\n"
- "subs r3, r3, #1\n"
- "bgt 0b\n"
-
- "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
- :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
- }
-};
-
-class CopyVld1Vst1Benchmark : public CopyBandwidthBenchmark {
-public:
- CopyVld1Vst1Benchmark() : CopyBandwidthBenchmark() { }
- virtual ~CopyVld1Vst1Benchmark() {}
-
- const char *getName() { return "vld1/vst1"; }
-
- bool usesNeon() { return true; }
-
-protected:
- // Copy using vld1/vst1 instructions.
-#if defined(__ARM_NEON__)
- void bench(size_t num_loops) {
- asm volatile(
- "stmfd sp!, {r0,r1,r2,r3,r4}\n"
-
- "mov r0, %0\n"
- "mov r1, %1\n"
- "mov r2, %2\n"
- "mov r3, %3\n"
-
- "0:\n"
- "mov r4, r2, lsr #6\n"
-
- "1:\n"
- "vld1.8 {d0-d3}, [r0]!\n"
- "vld1.8 {d4-d7}, [r0]!\n"
- "subs r4, r4, #1\n"
- "vst1.8 {d0-d3}, [r1:128]!\n"
- "vst1.8 {d4-d7}, [r1:128]!\n"
- "bgt 1b\n"
-
- "sub r0, r0, r2\n"
- "sub r1, r1, r2\n"
- "subs r3, r3, #1\n"
- "bgt 0b\n"
-
- "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
- :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
-#else
- void bench(size_t) {
-#endif
- }
-};
-
-class CopyVldrVstrBenchmark : public CopyBandwidthBenchmark {
-public:
- CopyVldrVstrBenchmark() : CopyBandwidthBenchmark() { }
- virtual ~CopyVldrVstrBenchmark() {}
-
- const char *getName() { return "vldr/vstr"; }
-
- bool usesNeon() { return true; }
-
-protected:
- // Copy using vldr/vstr instructions.
-#if defined(__ARM_NEON__)
- void bench(size_t num_loops) {
- asm volatile(
- "stmfd sp!, {r0,r1,r2,r3,r4}\n"
-
- "mov r0, %0\n"
- "mov r1, %1\n"
- "mov r2, %2\n"
- "mov r3, %3\n"
-
- "0:\n"
- "mov r4, r2, lsr #6\n"
-
- "1:\n"
- "vldr d0, [r0, #0]\n"
- "subs r4, r4, #1\n"
- "vldr d1, [r0, #8]\n"
- "vstr d0, [r1, #0]\n"
- "vldr d0, [r0, #16]\n"
- "vstr d1, [r1, #8]\n"
- "vldr d1, [r0, #24]\n"
- "vstr d0, [r1, #16]\n"
- "vldr d0, [r0, #32]\n"
- "vstr d1, [r1, #24]\n"
- "vldr d1, [r0, #40]\n"
- "vstr d0, [r1, #32]\n"
- "vldr d0, [r0, #48]\n"
- "vstr d1, [r1, #40]\n"
- "vldr d1, [r0, #56]\n"
- "vstr d0, [r1, #48]\n"
- "add r0, r0, #64\n"
- "vstr d1, [r1, #56]\n"
- "add r1, r1, #64\n"
- "bgt 1b\n"
-
- "sub r0, r0, r2\n"
- "sub r1, r1, r2\n"
- "subs r3, r3, #1\n"
- "bgt 0b\n"
-
- "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
- :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
-#else
- void bench(size_t) {
-#endif
- }
-};
-
-class CopyVldmiaVstmiaBenchmark : public CopyBandwidthBenchmark {
-public:
- CopyVldmiaVstmiaBenchmark() : CopyBandwidthBenchmark() { }
- virtual ~CopyVldmiaVstmiaBenchmark() {}
-
- const char *getName() { return "vldmia/vstmia"; }
-
- bool usesNeon() { return true; }
-
-protected:
- // Copy using vldmia/vstmia instructions.
-#if defined(__ARM_NEON__)
- void bench(size_t num_loops) {
- asm volatile(
- "stmfd sp!, {r0,r1,r2,r3,r4}\n"
-
- "mov r0, %0\n"
- "mov r1, %1\n"
- "mov r2, %2\n"
- "mov r3, %3\n"
-
- "0:\n"
- "mov r4, r2, lsr #6\n"
-
- "1:\n"
- "vldmia r0!, {d0-d7}\n"
- "subs r4, r4, #1\n"
- "vstmia r1!, {d0-d7}\n"
- "bgt 1b\n"
-
- "sub r0, r0, r2\n"
- "sub r1, r1, r2\n"
- "subs r3, r3, #1\n"
- "bgt 0b\n"
-
- "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
- :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
-#else
- void bench(size_t) {
-#endif
- }
-};
-
-class MemcpyBenchmark : public CopyBandwidthBenchmark {
-public:
- MemcpyBenchmark() : CopyBandwidthBenchmark() { }
- virtual ~MemcpyBenchmark() {}
-
- const char *getName() { return "memcpy"; }
-
-protected:
- void bench(size_t num_loops) {
- for (size_t i = 0; i < num_loops; i++) {
- memcpy(_dst, _src, _size);
- }
- }
-};
-
-class SingleBufferBandwidthBenchmark : public BandwidthBenchmark {
-public:
- SingleBufferBandwidthBenchmark() : BandwidthBenchmark(), _buffer(NULL) { }
- virtual ~SingleBufferBandwidthBenchmark() {
- if (_buffer) {
- free(_buffer);
- _buffer = NULL;
- }
- }
-
- bool setSize(size_t size) {
- if (_buffer) {
- free(_buffer);
- _buffer = NULL;
- }
-
- if (size == 0) {
- _size = DEFAULT_SINGLE_BUFFER_SIZE;
- } else {
- _size = size;
- }
-
- _buffer = reinterpret_cast<char*>(memalign(64, _size));
- if (!_buffer) {
- perror("Failed to allocate memory for test.");
- return false;
- }
- memset(_buffer, 0, _size);
-
- return true;
- }
-
- bool verify() { return true; }
-
-protected:
- char *_buffer;
-
- static const unsigned int DEFAULT_SINGLE_BUFFER_SIZE = 16000;
-};
-
-class WriteBandwidthBenchmark : public SingleBufferBandwidthBenchmark {
-public:
- WriteBandwidthBenchmark() : SingleBufferBandwidthBenchmark() { }
- virtual ~WriteBandwidthBenchmark() { }
-
- bool verify() {
- memset(_buffer, 0, _size);
- bench(1);
- for (size_t i = 0; i < _size; i++) {
- if (_buffer[i] != 1) {
- printf("Buffer failed to compare after one loop.\n");
- return false;
- }
- }
-
- memset(_buffer, 0, _size);
- bench(2);
- for (size_t i = 0; i < _size; i++) {
- if (_buffer[i] != 2) {
- printf("Buffer failed to compare after two loops.\n");
- return false;
- }
- }
-
- return true;
- }
-};
-
-class WriteStrdBenchmark : public WriteBandwidthBenchmark {
-public:
- WriteStrdBenchmark() : WriteBandwidthBenchmark() { }
- virtual ~WriteStrdBenchmark() {}
-
- const char *getName() { return "strd"; }
-
-protected:
- // Write a given value using strd.
- void bench(size_t num_loops) {
- asm volatile(
- "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
-
- "mov r0, %0\n"
- "mov r1, %1\n"
- "mov r2, %2\n"
-
- "mov r4, #0\n"
- "mov r5, #0\n"
-
- "0:\n"
- "mov r3, r1, lsr #5\n"
-
- "add r4, r4, #0x01010101\n"
- "mov r5, r4\n"
-
- "1:\n"
- "subs r3, r3, #1\n"
- "strd r4, r5, [r0]\n"
- "strd r4, r5, [r0, #8]\n"
- "strd r4, r5, [r0, #16]\n"
- "strd r4, r5, [r0, #24]\n"
- "add r0, r0, #32\n"
- "bgt 1b\n"
-
- "sub r0, r0, r1\n"
- "subs r2, r2, #1\n"
- "bgt 0b\n"
-
- "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
- :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
- }
-};
-
-class WriteStmiaBenchmark : public WriteBandwidthBenchmark {
-public:
- WriteStmiaBenchmark() : WriteBandwidthBenchmark() { }
- virtual ~WriteStmiaBenchmark() {}
-
- const char *getName() { return "stmia"; }
-
-protected:
- // Write a given value using stmia.
- void bench(size_t num_loops) {
- asm volatile(
- "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
-
- "mov r0, %0\n"
- "mov r1, %1\n"
- "mov r2, %2\n"
-
- "mov r4, #0\n"
-
- "0:\n"
- "mov r3, r1, lsr #5\n"
-
- "add r4, r4, #0x01010101\n"
- "mov r5, r4\n"
- "mov r6, r4\n"
- "mov r7, r4\n"
- "mov r8, r4\n"
- "mov r9, r4\n"
- "mov r10, r4\n"
- "mov r11, r4\n"
-
- "1:\n"
- "subs r3, r3, #1\n"
- "stmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
- "bgt 1b\n"
-
- "sub r0, r0, r1\n"
- "subs r2, r2, #1\n"
- "bgt 0b\n"
-
- "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
- :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
- }
-};
-
-class WriteVst1Benchmark : public WriteBandwidthBenchmark {
-public:
- WriteVst1Benchmark() : WriteBandwidthBenchmark() { }
- virtual ~WriteVst1Benchmark() {}
-
- const char *getName() { return "vst1"; }
-
- bool usesNeon() { return true; }
-
-protected:
- // Write a given value using vst.
-#if defined(__ARM_NEON__)
- void bench(size_t num_loops) {
- asm volatile(
- "stmfd sp!, {r0,r1,r2,r3,r4}\n"
-
- "mov r0, %0\n"
- "mov r1, %1\n"
- "mov r2, %2\n"
- "mov r4, #0\n"
-
- "0:\n"
- "mov r3, r1, lsr #5\n"
-
- "add r4, r4, #1\n"
- "vdup.8 d0, r4\n"
- "vmov d1, d0\n"
- "vmov d2, d0\n"
- "vmov d3, d0\n"
-
- "1:\n"
- "subs r3, r3, #1\n"
- "vst1.8 {d0-d3}, [r0:128]!\n"
- "bgt 1b\n"
-
- "sub r0, r0, r1\n"
- "subs r2, r2, #1\n"
- "bgt 0b\n"
-
- "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
- :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
-#else
- void bench(size_t) {
-#endif
- }
-};
-
-class WriteVstrBenchmark : public WriteBandwidthBenchmark {
-public:
- WriteVstrBenchmark() : WriteBandwidthBenchmark() { }
- virtual ~WriteVstrBenchmark() {}
-
- const char *getName() { return "vstr"; }
-
- bool usesNeon() { return true; }
-
-protected:
- // Write a given value using vst.
-#if defined(__ARM_NEON__)
- void bench(size_t num_loops) {
- asm volatile(
- "stmfd sp!, {r0,r1,r2,r3,r4}\n"
-
- "mov r0, %0\n"
- "mov r1, %1\n"
- "mov r2, %2\n"
- "mov r4, #0\n"
-
- "0:\n"
- "mov r3, r1, lsr #5\n"
-
- "add r4, r4, #1\n"
- "vdup.8 d0, r4\n"
- "vmov d1, d0\n"
- "vmov d2, d0\n"
- "vmov d3, d0\n"
-
- "1:\n"
- "vstr d0, [r0, #0]\n"
- "subs r3, r3, #1\n"
- "vstr d1, [r0, #8]\n"
- "vstr d0, [r0, #16]\n"
- "vstr d1, [r0, #24]\n"
- "add r0, r0, #32\n"
- "bgt 1b\n"
-
- "sub r0, r0, r1\n"
- "subs r2, r2, #1\n"
- "bgt 0b\n"
-
- "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
- :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
-#else
- void bench(size_t) {
-#endif
- }
-};
-
-class WriteVstmiaBenchmark : public WriteBandwidthBenchmark {
-public:
- WriteVstmiaBenchmark() : WriteBandwidthBenchmark() { }
- virtual ~WriteVstmiaBenchmark() {}
-
- const char *getName() { return "vstmia"; }
-
- bool usesNeon() { return true; }
-
-protected:
- // Write a given value using vstmia.
-#if defined(__ARM_NEON__)
- void bench(size_t num_loops) {
- asm volatile(
- "stmfd sp!, {r0,r1,r2,r3,r4}\n"
-
- "mov r0, %0\n"
- "mov r1, %1\n"
- "mov r2, %2\n"
- "mov r4, #0\n"
-
- "0:\n"
- "mov r3, r1, lsr #5\n"
-
- "add r4, r4, #1\n"
- "vdup.8 d0, r4\n"
- "vmov d1, d0\n"
- "vmov d2, d0\n"
- "vmov d3, d0\n"
-
- "1:\n"
- "subs r3, r3, #1\n"
- "vstmia r0!, {d0-d3}\n"
- "bgt 1b\n"
-
- "sub r0, r0, r1\n"
- "subs r2, r2, #1\n"
- "bgt 0b\n"
-
- "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
- :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
-#else
- void bench(size_t) {
-#endif
- }
-};
-
-class MemsetBenchmark : public WriteBandwidthBenchmark {
-public:
- MemsetBenchmark() : WriteBandwidthBenchmark() { }
- virtual ~MemsetBenchmark() {}
-
- const char *getName() { return "memset"; }
-
-protected:
- void bench(size_t num_loops) {
- for (size_t i = 0; i < num_loops; i++) {
- memset(_buffer, (i % 255) + 1, _size);
- }
- }
-};
-
-class ReadLdrdBenchmark : public SingleBufferBandwidthBenchmark {
-public:
- ReadLdrdBenchmark() : SingleBufferBandwidthBenchmark() { }
- virtual ~ReadLdrdBenchmark() {}
-
- const char *getName() { return "ldrd"; }
-
-protected:
- // Write a given value using strd.
- void bench(size_t num_loops) {
- asm volatile(
- "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
-
- "mov r0, %0\n"
- "mov r1, %1\n"
- "mov r2, %2\n"
-
- "0:\n"
- "mov r3, r1, lsr #5\n"
-
- "1:\n"
- "subs r3, r3, #1\n"
- "ldrd r4, r5, [r0]\n"
- "ldrd r4, r5, [r0, #8]\n"
- "ldrd r4, r5, [r0, #16]\n"
- "ldrd r4, r5, [r0, #24]\n"
- "add r0, r0, #32\n"
- "bgt 1b\n"
-
- "sub r0, r0, r1\n"
- "subs r2, r2, #1\n"
- "bgt 0b\n"
-
- "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
- :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
- }
-};
-
-class ReadLdmiaBenchmark : public SingleBufferBandwidthBenchmark {
-public:
- ReadLdmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
- virtual ~ReadLdmiaBenchmark() {}
-
- const char *getName() { return "ldmia"; }
-
-protected:
- // Write a given value using stmia.
- void bench(size_t num_loops) {
- asm volatile(
- "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
-
- "mov r0, %0\n"
- "mov r1, %1\n"
- "mov r2, %2\n"
-
- "0:\n"
- "mov r3, r1, lsr #5\n"
-
- "1:\n"
- "subs r3, r3, #1\n"
- "ldmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
- "bgt 1b\n"
-
- "sub r0, r0, r1\n"
- "subs r2, r2, #1\n"
- "bgt 0b\n"
-
- "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
- :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
- }
-};
-
-class ReadVld1Benchmark : public SingleBufferBandwidthBenchmark {
-public:
- ReadVld1Benchmark() : SingleBufferBandwidthBenchmark() { }
- virtual ~ReadVld1Benchmark() {}
-
- const char *getName() { return "vld1"; }
-
- bool usesNeon() { return true; }
-
-protected:
- // Write a given value using vst.
-#if defined(__ARM_NEON__)
- void bench(size_t num_loops) {
- asm volatile(
- "stmfd sp!, {r0,r1,r2,r3}\n"
-
- "mov r0, %0\n"
- "mov r1, %1\n"
- "mov r2, %2\n"
-
- "0:\n"
- "mov r3, r1, lsr #5\n"
-
- "1:\n"
- "subs r3, r3, #1\n"
- "vld1.8 {d0-d3}, [r0:128]!\n"
- "bgt 1b\n"
-
- "sub r0, r0, r1\n"
- "subs r2, r2, #1\n"
- "bgt 0b\n"
-
- "ldmfd sp!, {r0,r1,r2,r3}\n"
- :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
-#else
- void bench(size_t) {
-#endif
- }
-};
-
-class ReadVldrBenchmark : public SingleBufferBandwidthBenchmark {
-public:
- ReadVldrBenchmark() : SingleBufferBandwidthBenchmark() { }
- virtual ~ReadVldrBenchmark() {}
-
- const char *getName() { return "vldr"; }
-
- bool usesNeon() { return true; }
-
-protected:
- // Write a given value using vst.
-#if defined(__ARM_NEON__)
- void bench(size_t num_loops) {
- asm volatile(
- "stmfd sp!, {r0,r1,r2,r3}\n"
-
- "mov r0, %0\n"
- "mov r1, %1\n"
- "mov r2, %2\n"
-
- "0:\n"
- "mov r3, r1, lsr #5\n"
-
- "1:\n"
- "vldr d0, [r0, #0]\n"
- "subs r3, r3, #1\n"
- "vldr d1, [r0, #8]\n"
- "vldr d0, [r0, #16]\n"
- "vldr d1, [r0, #24]\n"
- "add r0, r0, #32\n"
- "bgt 1b\n"
-
- "sub r0, r0, r1\n"
- "subs r2, r2, #1\n"
- "bgt 0b\n"
-
- "ldmfd sp!, {r0,r1,r2,r3}\n"
- :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
-#else
- void bench(size_t) {
-#endif
- }
-};
-
-
-class ReadVldmiaBenchmark : public SingleBufferBandwidthBenchmark {
-public:
- ReadVldmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
- virtual ~ReadVldmiaBenchmark() {}
-
- const char *getName() { return "vldmia"; }
-
- bool usesNeon() { return true; }
-
-protected:
- // Write a given value using vstmia.
-#if defined(__ARM_NEON__)
- void bench(size_t num_loops) {
- asm volatile(
- "stmfd sp!, {r0,r1,r2,r3}\n"
-
- "mov r0, %0\n"
- "mov r1, %1\n"
- "mov r2, %2\n"
-
- "0:\n"
- "mov r3, r1, lsr #5\n"
-
- "1:\n"
- "subs r3, r3, #1\n"
- "vldmia r0!, {d0-d3}\n"
- "bgt 1b\n"
-
- "sub r0, r0, r1\n"
- "subs r2, r2, #1\n"
- "bgt 0b\n"
-
- "ldmfd sp!, {r0,r1,r2,r3}\n"
- :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
-#else
- void bench(size_t) {
-#endif
- }
-};
-
-#endif // __BANDWIDTH_H__
+++ /dev/null
-/*
- * Copyright (C) 2007 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-#include <time.h>
-#include <unistd.h>
-#include <sched.h>
-#include <sys/resource.h>
-#include <sys/syscall.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-
-#ifdef __ARM_NEON__
-#include <arm_neon.h>
-#endif
-
-
-typedef long long nsecs_t;
-static nsecs_t gTime;
-float data_f[1024 * 128];
-
-static nsecs_t system_time()
-{
- struct timespec t;
- t.tv_sec = t.tv_nsec = 0;
- clock_gettime(CLOCK_MONOTONIC, &t);
- return nsecs_t(t.tv_sec)*1000000000LL + t.tv_nsec;
-}
-
-static void startTime()
-{
- gTime = system_time();
-}
-
-static void endTime(const char *str, double ops)
-{
- nsecs_t t = system_time() - gTime;
- double ds = ((double)t) / 1e9;
- printf("Test: %s, %f Mops\n", str, ops / ds / 1e6);
-}
-
-
-static void test_mad() {
- for(int i=0; i<1020; i++) {
- data_f[i] = i;
- }
-
- startTime();
-
- // Do ~1 billion ops
- for (int ct=0; ct < (1000 * (1000 / 20)); ct++) {
- for (int i=0; i < 1000; i++) {
- data_f[i] = (data_f[i] * 0.02f +
- data_f[i+1] * 0.04f +
- data_f[i+2] * 0.05f +
- data_f[i+3] * 0.1f +
- data_f[i+4] * 0.2f +
- data_f[i+5] * 0.2f +
- data_f[i+6] * 0.1f +
- data_f[i+7] * 0.05f +
- data_f[i+8] * 0.04f +
- data_f[i+9] * 0.02f + 1.f);
- }
- }
-
- endTime("scalar mad", 1e9);
-}
-
-
-#ifdef __ARM_NEON__
-
-static void test_fma() {
- for(int i=0; i<1020 * 4; i++) {
- data_f[i] = i;
- }
- float32x4_t c0_02 = vdupq_n_f32(0.02f);
- float32x4_t c0_04 = vdupq_n_f32(0.04f);
- float32x4_t c0_05 = vdupq_n_f32(0.05f);
- float32x4_t c0_10 = vdupq_n_f32(0.1f);
- float32x4_t c0_20 = vdupq_n_f32(0.2f);
- float32x4_t c1_00 = vdupq_n_f32(1.0f);
-
- startTime();
-
- // Do ~1 billion ops
- for (int ct=0; ct < (1000 * (1000 / 80)); ct++) {
- for (int i=0; i < 1000; i++) {
- float32x4_t t;
- t = vmulq_f32(vld1q_f32((float32_t *)&data_f[i]), c0_02);
- t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+4]), c0_04);
- t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+8]), c0_05);
- t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+12]), c0_10);
- t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+16]), c0_20);
- t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+20]), c0_20);
- t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+24]), c0_10);
- t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+28]), c0_05);
- t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+32]), c0_04);
- t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+36]), c0_02);
- t = vaddq_f32(t, c1_00);
- vst1q_f32((float32_t *)&data_f[i], t);
- }
- }
-
- endTime("neon fma", 1e9);
-}
-#endif
-
-int fp_test(int, char**) {
- test_mad();
-
-#ifdef __ARM_NEON__
- test_fma();
-#endif
-
- return 0;
-}
+++ /dev/null
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include <sys/time.h>
-#include <time.h>
-#include <unistd.h>
-#include <sys/resource.h>
-#include <sys/syscall.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-
-#include "memtest.h"
-
-nsecs_t system_time() {
- struct timespec t;
- t.tv_sec = t.tv_nsec = 0;
- clock_gettime(CLOCK_MONOTONIC, &t);
- return nsecs_t(t.tv_sec)*1000000000LL + t.tv_nsec;
-}
-
-static void usage(char* p) {
- printf("Usage: %s <test> <options>\n"
- "<test> is one of the following:\n"
- " copy_bandwidth [--size BYTES_TO_COPY]\n"
- " write_bandwidth [--size BYTES_TO_WRITE]\n"
- " read_bandwidth [--size BYTES_TO_COPY]\n"
- " per_core_bandwidth [--size BYTES]\n"
- " --type copy_ldrd_strd | copy_ldmia_stmia | copy_vld1_vst1 |\n"
- " copy_vldr_vstr | copy_vldmia_vstmia | memcpy | write_strd |\n"
- " write_stmia | write_vst1 | write_vstr | write_vstmia | memset |\n"
- " read_ldrd | read_ldmia | read_vld1 | read_vldr | read_vldmia\n"
- " multithread_bandwidth [--size BYTES]\n"
- " --type copy_ldrd_strd | copy_ldmia_stmia | copy_vld1_vst1 |\n"
- " copy_vldr_vstr | copy_vldmia_vstmia | memcpy | write_strd |\n"
- " write_stmia | write_vst1 | write_vstr | write_vstmia | memset |\n"
- " read_ldrd | read_ldmia | read_vld1 | read_vldr | read_vldmia\n"
- " --num_threads NUM_THREADS_TO_RUN\n"
- " malloc [fill]\n"
- " madvise\n"
- " resampler\n"
- " stack (stack smasher)\n"
- " crawl\n"
- , p);
-}
-
-int copy_bandwidth(int argc, char** argv);
-int write_bandwidth(int argc, char** argv);
-int read_bandwidth(int argc, char** argv);
-int per_core_bandwidth(int argc, char** argv);
-int multithread_bandwidth(int argc, char** argv);
-int malloc_test(int argc, char** argv);
-int madvise_test(int argc, char** argv);
-int stack_smasher_test(int argc, char** argv);
-int crawl_test(int argc, char** argv);
-int fp_test(int argc, char** argv);
-
-typedef struct {
- const char *cmd_name;
- int (*func)(int argc, char** argv);
-} function_t;
-
-function_t function_table[] = {
- { "malloc", malloc_test },
- { "madvise", madvise_test },
- { "stack", stack_smasher_test },
- { "crawl", crawl_test },
- { "fp", fp_test },
- { "copy_bandwidth", copy_bandwidth },
- { "write_bandwidth", write_bandwidth },
- { "read_bandwidth", read_bandwidth },
- { "per_core_bandwidth", per_core_bandwidth },
- { "multithread_bandwidth", multithread_bandwidth },
-};
-
-int main(int argc, char** argv) {
- if (argc == 1) {
- usage(argv[0]);
- return 0;
- }
- int err = -1;
- for (unsigned int i = 0; i < sizeof(function_table)/sizeof(function_t); i++) {
- if (strcmp(argv[1], function_table[i].cmd_name) == 0) {
- err = (*function_table[i].func)(argc-1, argv+1);
- break;
- }
- }
- if (err) {
- usage(argv[0]);
- }
- return err;
-}
-
-int malloc_test(int argc, char** argv) {
- bool fill = (argc>=2 && !strcmp(argv[1], "fill"));
- size_t total = 0;
- size_t size = 0x40000000;
- while (size) {
- void* addr = malloc(size);
- if (addr == 0) {
- printf("size = %9zd failed\n", size);
- size >>= 1;
- } else {
- total += size;
- printf("size = %9zd, addr = %p (total = %9zd (%zd MB))\n",
- size, addr, total, total / (1024*1024));
- if (fill) {
- printf("filling...\n");
- fflush(stdout);
- memset(addr, 0, size);
- }
- size = size + (size>>1);
- }
- }
- printf("done. allocated %zd MB\n", total / (1024*1024));
- return 0;
-}
-
-int madvise_test(int, char**) {
- for (int i=0 ; i<2 ; i++) {
- size_t size = i==0 ? 4096 : 48*1024*1024; // 48 MB
- printf("Allocating %zd MB... ", size/(1024*1024)); fflush(stdout);
- void* addr1 = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
- printf("%p (%s)\n", addr1, addr1==(void*)-1 ? "failed" : "OK"); fflush(stdout);
-
- printf("touching %p...\n", addr1); fflush(stdout);
- memset(addr1, 0x55, size);
-
- printf("advising DONTNEED...\n"); fflush(stdout);
- madvise(addr1, size, MADV_DONTNEED);
-
- printf("reading back %p...\n", addr1); fflush(stdout);
- if (*(long*)addr1 == 0) {
- printf("madvise freed some pages\n");
- } else if (*(long*)addr1 == 0x55555555) {
- printf("pages are still there\n");
- } else {
- printf("getting garbage back\n");
- }
-
- printf("Allocating %zd MB... ", size/(1024*1024)); fflush(stdout);
- void* addr2 = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
- printf("%p (%s)\n", addr2, addr2==(void*)-1 ? "failed" : "OK"); fflush(stdout);
-
- printf("touching %p...\n", addr2); fflush(stdout);
- memset(addr2, 0xAA, size);
-
- printf("unmap %p ...\n", addr2); fflush(stdout);
- munmap(addr2, size);
-
- printf("touching %p...\n", addr1); fflush(stdout);
- memset(addr1, 0x55, size);
-
- printf("unmap %p ...\n", addr1); fflush(stdout);
- munmap(addr1, size);
- }
-
- printf("Done\n"); fflush(stdout);
- return 0;
-}
-
-int stack_smasher_test(int, char**) {
- int dummy = 0;
- printf("corrupting our stack...\n");
- *(volatile long long*)&dummy = 0;
- return 0;
-}
-
-// --------------------------------------------------------------------
-
-extern "C" void thumb_function_1(int*p);
-extern "C" void thumb_function_2(int*p);
-extern "C" void arm_function_3(int*p);
-extern "C" void arm_function_2(int*p);
-extern "C" void arm_function_1(int*p);
-
-void arm_function_3(int*) {
- int a = 0;
- thumb_function_2(&a);
-}
-
-void arm_function_2(int*) {
- int a = 0;
- thumb_function_1(&a);
-}
-
-void arm_function_1(int*) {
- int a = 0;
- arm_function_2(&a);
-}
-
-int crawl_test(int, char**) {
- int a = 0;
- arm_function_1(&a);
- return 0;
-}
+++ /dev/null
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __MEMTEST_H__
-#define __MEMTEST_H__
-
-typedef long long nsecs_t;
-
-// Function prototypes.
-nsecs_t system_time();
-
-#endif // __MEMTEST_H__
+++ /dev/null
-#include <stdio.h>
-#include <unwind.h>
-
-extern "C" void arm_function_3(int* p);
-extern "C" void thumb_function_1(int* p);
-extern "C" void thumb_function_2(int* p);
-
-extern "C" _Unwind_Reason_Code trace_function(_Unwind_Context* context, void *) {
- printf("0x%x\n", _Unwind_GetIP(context));
- fflush(stdout);
- return _URC_NO_REASON;
-}
-
-void thumb_function_1(int*) {
- int a = 0;
- arm_function_3(&a);
-}
-
-void thumb_function_2(int*) {
- printf("unwinding...\n");
- _Unwind_Backtrace(trace_function, (void*) "backtrace!");
-}