* is_heavy_loaded() will return false.
*/
ALOGE("Error in getting cpu status. Skipping this check.");
+ fclose(fp);
return;
}
/*
* Calculate cpu usage in the past interval.
* If tracing is on, increase the idle threshold by 1.00% so that we do not
- * turn on and off tracing frequently whe the cpu load is right close to
+ * turn on and off tracing frequently when the cpu load is right close to
* threshold.
*/
static bool is_heavy_load(void) {
for (i = 0; i < num_blocks; i++, block--)
bg->block_bitmap[block / 8] &= ~(1 << (block % 8));
bg->free_blocks += num_blocks;
+ for (i = bg->chunk_count; i > 0 ;) {
+ --i;
+ if (bg->chunks[i].len >= num_blocks && bg->chunks[i].block <= block) {
+ if (bg->chunks[i].block == block) {
+ bg->chunks[i].block += num_blocks;
+ bg->chunks[i].len -= num_blocks;
+ } else if (bg->chunks[i].block + bg->chunks[i].len - 1 == block + num_blocks) {
+ bg->chunks[i].len -= num_blocks;
+ }
+ break;
+ }
+ }
}
/* Reduces an existing allocation by len blocks by return the last blocks
#include <memory>
#include <cmath>
#include <string>
+#include <thread>
+
+#define CACHE_HIT_SIZE 1 << 17
using namespace std;
-const size_t size_start = 64;
-const size_t size_end = 16 * (1ull << 20);
-const size_t samples = 2048;
+size_t size_start = 64;
+size_t size_end = 16 * (1ull << 20);
+size_t samples = 2048;
size_t size_per_test = 64 * (1ull << 20);
size_t tot_sum = 0;
+size_t delay = 0;
+float speed = 0;
+bool dummy = false;
void __attribute__((noinline)) memcpy_noinline(void *dst, void *src, size_t size);
void __attribute__((noinline)) memset_noinline(void *dst, int value, size_t size);
SumBench,
};
+static void usage(char* p) {
+ printf("Usage: %s <test> <options>\n"
+ "<test> is one of the following:\n"
+ " --memcpy\n"
+ " --memset\n"
+ " --sum\n"
+ "<options> are optional and apply to all tests:\n"
+ " --dummy\n"
+ " Simulates cpu-only load of a test. Guaranteed to use L2\n"
+ " instead. Not supported on --sum test.\n"
+ " --delay DELAY_DIVISOR\n"
+ " --start START_SIZE_MB\n"
+ " --end END_SIZE_MB (requires start, optional)\n"
+ " --samples NUM_SAMPLES\n"
+ , p);
+}
+
int main(int argc, char *argv[])
{
- BenchType type;
+ BenchType type = MemcpyBench;
if (argc <= 1) {
- cerr << "memcpy_perf [--memcpy|--memset|--sum]" << endl;
+ usage(argv[0]);
return 0;
}
- if (string(argv[1]) == string("--memcpy")) {
- type = MemcpyBench;
- } else if (string(argv[1]) == string("--memset")) {
- type = MemsetBench;
- } else if (string(argv[1]) == string("--sum")) {
- type = SumBench;
- } else {
- type = MemcpyBench;
+ for (int i = 1; i < argc; i++) {
+ if (string(argv[i]) == string("--memcpy")) {
+ type = MemcpyBench;
+ } else if (string(argv[i]) == string("--memset")) {
+ type = MemsetBench;
+ } else if (string(argv[i]) == string("--sum")) {
+ type = SumBench;
+ } else if (string(argv[i]) == string("--dummy")) {
+ dummy = true;
+ } else if (i + 1 < argc) {
+ if (string(argv[i]) == string("--delay")) {
+ delay = atoi(argv[++i]);
+ } else if (string(argv[i]) == string("--start")) {
+ size_start = atoi(argv[++i]) * (1ull << 20);
+ size_end = size_start;
+ } else if (string(argv[i]) == string("--end")) {
+ size_t end = atoi(argv[++i]) * (1ull << 20);
+ if (end > size_start && i > 3
+ && string(argv[i-3]) == string("--start")) {
+ size_end = end;
+ } else {
+ printf("Cannot specify --end without --start.\n");
+ return 0;
+ }
+ } else if (string(argv[i]) == string("--samples")) {
+ samples = atoi(argv[++i]);
+ } else {
+ printf("Unknown argument %s\n", argv[i]);
+ return 0;
+ }
+ } else {
+ printf("The %s option requires a single argument.\n", argv[i]);
+ return 0;
+ }
}
unique_ptr<uint8_t[]> src(new uint8_t[size_end]);
//cout << "src: " << (uintptr_t)src.get() << endl;
//cout << "dst: " << (uintptr_t)dst.get() << endl;
- for (double cur_pow = start_pow; cur_pow <= end_pow; cur_pow += pow_inc) {
- chrono::time_point<chrono::high_resolution_clock> copy_start, copy_end;
+ for (double cur_pow = start_pow; cur_pow <= end_pow && samples > 0;
+ cur_pow += pow_inc) {
+ chrono::time_point<chrono::high_resolution_clock>
+ copy_start, copy_end, pre_wait;
size_t cur_size = (size_t)pow(10.0, cur_pow);
size_t iter_per_size = size_per_test / cur_size;
case MemsetBench: {
memcpy_noinline(src.get(), dst.get(), cur_size);
memset_noinline(dst.get(), 0xdeadbeef, cur_size);
+ size_t hit_size = CACHE_HIT_SIZE;
copy_start = chrono::high_resolution_clock::now();
for (int i = 0; i < iter_per_size; i++) {
- memset_noinline(dst.get(), 0xdeadbeef, cur_size);
+ if (!dummy) {
+ memset_noinline(dst.get(), 0xdeadbeef, cur_size);
+ } else {
+ while (hit_size < cur_size) {
+ memset_noinline
+ (dst.get(), 0xdeadbeef, CACHE_HIT_SIZE);
+ hit_size += 1 << 17;
+ }
+ }
+ if (delay != 0)
+ this_thread::sleep_for(chrono
+ ::nanoseconds(size_per_test / delay));
}
copy_end = chrono::high_resolution_clock::now();
break;
case MemcpyBench: {
memcpy_noinline(dst.get(), src.get(), cur_size);
memcpy_noinline(src.get(), dst.get(), cur_size);
+ size_t hit_size = CACHE_HIT_SIZE;
copy_start = chrono::high_resolution_clock::now();
for (int i = 0; i < iter_per_size; i++) {
- memcpy_noinline(dst.get(), src.get(), cur_size);
+ if (!dummy) {
+ memcpy_noinline(dst.get(), src.get(), cur_size);
+ } else {
+ while (hit_size < cur_size) {
+ memcpy_noinline
+ (dst.get(), src.get(), CACHE_HIT_SIZE);
+ hit_size += CACHE_HIT_SIZE;
+ }
+ }
+ if (delay != 0)
+ this_thread::sleep_for(chrono
+ ::nanoseconds(size_per_test / delay));
}
copy_end = chrono::high_resolution_clock::now();
break;
copy_start = chrono::high_resolution_clock::now();
for (int i = 0; i < iter_per_size; i++) {
s += sum(src.get(), cur_size);
+ if (delay != 0)
+ this_thread::sleep_for(chrono
+ ::nanoseconds(size_per_test / delay));
}
copy_end = chrono::high_resolution_clock::now();
tot_sum += s;
}
}
+ samples--;
double ns_per_copy = chrono::duration_cast<chrono::nanoseconds>(copy_end - copy_start).count() / double(iter_per_size);
double gb_per_sec = ((double)cur_size / (1ull<<30)) / (ns_per_copy / 1.0E9);
if (type == MemcpyBench)
gb_per_sec *= 2.0;
- cout << "size: " << cur_size << ", perf: " << gb_per_sec << "GB/s, iter: " << iter_per_size << endl;
+ double percent_waiting = 0;
+ if (delay != 0) {
+ percent_waiting = (size_per_test / delay) / ns_per_copy * 100;
+ }
+ cout << "size: " << cur_size << ", perf: " << gb_per_sec
+ << "GB/s, iter: " << iter_per_size << ", \% time spent waiting: "
+ << percent_waiting << endl;
}
return 0;
}
#include <ctype.h>
#include <stddef.h>
+#include <mntent.h>
#include <sys/mman.h>
#include <sys/stat.h>
}
static int store_num_cached(const char* fpath, const struct stat *sb) {
- int fd;
+ int fd, ret = -1;
fd = open (fpath, O_RDONLY);
if (fd == -1) {
- printf("Could not open file.");
- return -1;
+ fprintf(stderr, "Could not open file: %s\n", fpath);
+ return ret;
}
void* mapped_addr = mmap(NULL, sb->st_size, PROT_NONE, MAP_SHARED, fd, 0);
// Calculate bit-vector size
size_t num_file_pages = (sb->st_size + g_page_size - 1) / g_page_size;
unsigned char* mincore_data = calloc(1, num_file_pages);
- int ret = mincore(mapped_addr, sb->st_size, mincore_data);
- int num_cached = 0;
- unsigned int page = 0;
- for (page = 0; page < num_file_pages; page++) {
- if (mincore_data[page]) num_cached++;
- }
- if (num_cached > 0) {
- struct file_info *info = get_file_info(fpath, sb->st_size);
- info->num_cached_pages += num_cached;
- g_total_cached += num_cached;
+ ret = mincore(mapped_addr, sb->st_size, mincore_data);
+ if (!ret) {
+ int num_cached = 0;
+ unsigned int page = 0;
+ for (page = 0; page < num_file_pages; page++) {
+ if (mincore_data[page]) num_cached++;
+ }
+ if (num_cached > 0) {
+ struct file_info *info = get_file_info(fpath, sb->st_size);
+ info->num_cached_pages += num_cached;
+ g_total_cached += num_cached;
+ }
}
munmap(mapped_addr, sb->st_size);
}
close(fd);
- return 0;
+ return ret;
}
-static int scan_entry(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
+static int scan_entry(const char *fpath, const struct stat *sb, int typeflag,
+ struct FTW * __attribute__((unused))ftwbuf) {
if (typeflag == FTW_F) {
store_num_cached(fpath, sb);
}
g_files = malloc(INITIAL_NUM_FILES * sizeof(struct file_info*));
g_files_size = INITIAL_NUM_FILES;
- // Walk filesystem trees
- nftw("/system/", &scan_entry, MAX_NUM_FD, 0);
- nftw("/vendor/", &scan_entry, MAX_NUM_FD, 0);
- nftw("/data/", &scan_entry, MAX_NUM_FD, 0);
+ // Walk filesystem trees through procfs except rootfs/devfs/sysfs/procfs
+ FILE* fp = setmntent("/proc/mounts", "r");
+ if (fp == NULL) {
+ fprintf(stderr, "Error opening /proc/mounts\n");
+ return -errno;
+ }
+ struct mntent* mentry;
+ while ((mentry = getmntent(fp)) != NULL) {
+ if (strcmp(mentry->mnt_type, "rootfs") != 0 &&
+ strncmp("/dev", mentry->mnt_dir, strlen("/dev")) != 0 &&
+ strncmp("/sys", mentry->mnt_dir, strlen("/sys")) != 0 &&
+ strncmp("/proc", mentry->mnt_dir, strlen("/proc")) != 0) {
+ nftw(mentry->mnt_dir, &scan_entry, MAX_NUM_FD, FTW_MOUNT | FTW_PHYS | FTW_DEPTH);
+ }
+ }
+ endmntent(fp);
// Sort entries
qsort(g_files, g_num_files, sizeof(g_files[0]), &cmpfiles);
struct CpuToggleThreadArg {
int toggle_cpu;
std::atomic<bool> end_flag;
+ std::atomic<bool> cpu_hotplug_failed;
+
+ CpuToggleThreadArg(int cpu)
+ : toggle_cpu(cpu), end_flag(false), cpu_hotplug_failed(false) {
+ }
};
static void CpuToggleThread(CpuToggleThreadArg* arg) {
while (!arg->end_flag) {
- CHECK(SetCpuOnline(arg->toggle_cpu, true));
+ if (!SetCpuOnline(arg->toggle_cpu, true)) {
+ arg->cpu_hotplug_failed = true;
+ break;
+ }
std::this_thread::sleep_for(cpu_hotplug_interval);
- CHECK(SetCpuOnline(arg->toggle_cpu, false));
+ if (!SetCpuOnline(arg->toggle_cpu, false)) {
+ arg->cpu_hotplug_failed = true;
+ break;
+ }
std::this_thread::sleep_for(cpu_hotplug_interval);
}
}
if (!FindAHotpluggableCpu(&test_cpu)) {
return;
}
- CpuToggleThreadArg cpu_toggle_arg;
- cpu_toggle_arg.toggle_cpu = test_cpu;
- cpu_toggle_arg.end_flag = false;
+ CpuToggleThreadArg cpu_toggle_arg(test_cpu);
std::thread cpu_toggle_thread(CpuToggleThread, &cpu_toggle_arg);
std::unique_ptr<EventTypeAndModifier> event_type_modifier = ParseEventType("cpu-cycles");
auto report_step = std::chrono::seconds(15);
size_t iterations = 0;
- while (cur_time < end_time) {
+ while (cur_time < end_time && !cpu_toggle_arg.cpu_hotplug_failed) {
if (cur_time + report_step < std::chrono::steady_clock::now()) {
// Report test time.
auto diff = std::chrono::duration_cast<std::chrono::seconds>(
iterations++;
GTEST_LOG_(INFO) << "Test offline while recording for " << iterations << " times.";
}
+ if (cpu_toggle_arg.cpu_hotplug_failed) {
+ GTEST_LOG_(INFO) << "Test ends because of cpu hotplug failure.";
+ }
cpu_toggle_arg.end_flag = true;
cpu_toggle_thread.join();
}
if (!FindAHotpluggableCpu(&test_cpu)) {
return;
}
- CpuToggleThreadArg cpu_toggle_arg;
- cpu_toggle_arg.toggle_cpu = test_cpu;
- cpu_toggle_arg.end_flag = false;
+ CpuToggleThreadArg cpu_toggle_arg(test_cpu);
std::thread cpu_toggle_thread(CpuToggleThread, &cpu_toggle_arg);
std::unique_ptr<EventTypeAndModifier> event_type_modifier = ParseEventType("cpu-cycles");
auto report_step = std::chrono::seconds(15);
size_t iterations = 0;
- while (cur_time < end_time) {
+ while (cur_time < end_time && !cpu_toggle_arg.cpu_hotplug_failed) {
if (cur_time + report_step < std::chrono::steady_clock::now()) {
// Report test time.
auto diff = std::chrono::duration_cast<std::chrono::seconds>(
iterations++;
GTEST_LOG_(INFO) << "Test offline while ioctl(PERF_EVENT_IOC_ENABLE) for " << iterations << " times.";
}
+ if (cpu_toggle_arg.cpu_hotplug_failed) {
+ GTEST_LOG_(INFO) << "Test ends because of cpu hotplug failure.";
+ }
cpu_toggle_arg.end_flag = true;
cpu_toggle_thread.join();
}
if (!FindAHotpluggableCpu(&test_cpu)) {
return;
}
- CpuToggleThreadArg cpu_toggle_arg;
- cpu_toggle_arg.toggle_cpu = test_cpu;
- cpu_toggle_arg.end_flag = false;
+ CpuToggleThreadArg cpu_toggle_arg(test_cpu);
std::thread cpu_toggle_thread(CpuToggleThread, &cpu_toggle_arg);
// Start cpu spinner.
auto report_step = std::chrono::seconds(15);
size_t iterations = 0;
- while (cur_time < end_time) {
+ while (cur_time < end_time && !cpu_toggle_arg.cpu_hotplug_failed) {
if (cur_time + report_step < std::chrono::steady_clock::now()) {
auto diff = std::chrono::duration_cast<std::chrono::seconds>(
std::chrono::steady_clock::now() - start_time);
iterations++;
GTEST_LOG_(INFO) << "Test offline while user process profiling for " << iterations << " times.";
}
+ if (cpu_toggle_arg.cpu_hotplug_failed) {
+ GTEST_LOG_(INFO) << "Test ends because of cpu hotplug failure.";
+ }
cpu_toggle_arg.end_flag = true;
cpu_toggle_thread.join();
cpu_spin_arg.end_flag = true;
cpu_spin_thread.join();
// Check if the cpu-cycle event is still available on test_cpu.
- ASSERT_TRUE(SetCpuOnline(test_cpu, true));
- ASSERT_TRUE(EventFd::OpenEventFile(attr, -1, test_cpu, nullptr, true) != nullptr);
+ if (SetCpuOnline(test_cpu, true)) {
+ ASSERT_TRUE(EventFd::OpenEventFile(attr, -1, test_cpu, nullptr, true) != nullptr);
+ }
}
// http://b/19863147.
const size_t TEST_ITERATION_COUNT = 10u;
for (size_t i = 0; i < TEST_ITERATION_COUNT; ++i) {
int record_cpu = 0;
- ASSERT_TRUE(SetCpuOnline(test_cpu, true));
+ if (!SetCpuOnline(test_cpu, true)) {
+ break;
+ }
std::unique_ptr<EventFd> event_fd = EventFd::OpenEventFile(attr, getpid(), record_cpu, nullptr);
ASSERT_TRUE(event_fd != nullptr);
- ASSERT_TRUE(SetCpuOnline(test_cpu, false));
+ if (!SetCpuOnline(test_cpu, false)) {
+ break;
+ }
event_fd = nullptr;
event_fd = EventFd::OpenEventFile(attr, getpid(), record_cpu, nullptr);
ASSERT_TRUE(event_fd != nullptr);
function usage() {
cat<<EOT
Usage:
-${0##*/} SRC_DIR OUTPUT_FILE [-s] [-m MOUNT_POINT] [-d PRODUCT_OUT] [-C FS_CONFIG ] [-c FILE_CONTEXTS] [-B BLOCK_MAP_FILE] [-b BLOCK_SIZE] [-z COMPRESSOR] [-zo COMPRESSOR_OPT] [-t COMPRESS_THRESHOLD] [-a]
+${0##*/} SRC_DIR OUTPUT_FILE [-s] [-m MOUNT_POINT] [-d PRODUCT_OUT] [-C FS_CONFIG ] [-c FILE_CONTEXTS] [-B BLOCK_MAP_FILE] [-b BLOCK_SIZE] [-z COMPRESSOR] [-zo COMPRESSOR_OPT] [-t COMPRESS_THRESHOLD] [-w WHITELIST_FILE] [-a]
EOT
}
shift; shift
fi
+WHITELIST_FILE=
+if [[ "$1" == "-w" ]]; then
+ WHITELIST_FILE=$2
+ shift; shift
+fi
DISABLE_4K_ALIGN=false
if [[ "$1" == "-a" ]]; then
if [ "$DISABLE_4K_ALIGN" = true ]; then
OPT="$OPT -disable-4k-align"
fi
+if [ -n "$WHITELIST_FILE" ]; then
+ OPT="$OPT -whitelist $WHITELIST_FILE"
+fi
MAKE_SQUASHFS_CMD="mksquashfs $SRC_DIR/ $OUTPUT_FILE -no-progress -comp $COMPRESSOR $COMPRESSOR_OPT -no-exports -noappend -no-recovery -no-fragments -no-duplicates -android-fs-config $OPT"
echo $MAKE_SQUASHFS_CMD
LOCAL_PATH:= $(call my-dir)
include $(CLEAR_VARS)
-LOCAL_SRC_FILES:= icache_main.c icache.S icache2.S
+LOCAL_SRC_FILES:= icache_main.cpp Profiler.cpp icache.S
LOCAL_SHARED_LIBRARIES := libc
LOCAL_MODULE_TARGET_ARCH := arm
+LOCAL_CFLAGS += -Wall -Werror
+
include $(BUILD_EXECUTABLE)
--- /dev/null
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Profiler.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <iostream>
+
+#if defined(__linux__)
+
+#include <sys/syscall.h>
+
+#ifdef __ARM_ARCH
+ enum ARMv8PmuPerfTypes{
+ // Common micro-architecture events
+ ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL = 0x01,
+ ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS = 0x14,
+ ARMV8_PMUV3_PERFCTR_L2_CACHE_ACCESS = 0x16,
+ ARMV8_PMUV3_PERFCTR_L2_CACHE_REFILL = 0x17,
+ ARMV8_PMUV3_PERFCTR_L2_CACHE_WB = 0x18,
+ };
+#endif
+
+static int perf_event_open(struct perf_event_attr* hw_event, pid_t pid,
+ int cpu, int group_fd, unsigned long flags) {
+ return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
+}
+
+#endif // __linux__
+
+namespace utils {
+
+Profiler& Profiler::get() noexcept {
+ static Profiler sProfiler;
+ return sProfiler;
+}
+
+Profiler::Profiler() noexcept {
+ std::uninitialized_fill(mCountersFd.begin(), mCountersFd.end(), -1);
+ Profiler::resetEvents(EV_CPU_CYCLES | EV_L1D_RATES | EV_BPU_RATES);
+}
+
+Profiler::~Profiler() noexcept {
+ for (int fd : mCountersFd) {
+ if (fd >= 0) {
+ close(fd);
+ }
+ }
+}
+
+uint32_t Profiler::resetEvents(uint32_t eventMask) noexcept {
+ // close all counters
+ for (int& fd : mCountersFd) {
+ if (fd >= 0) {
+ close(fd);
+ fd = -1;
+ }
+ }
+ mEnabledEvents = 0;
+
+#if defined(__linux__)
+
+ struct perf_event_attr pe;
+ memset(&pe, 0, sizeof(struct perf_event_attr));
+ pe.type = PERF_TYPE_HARDWARE;
+ pe.size = sizeof(struct perf_event_attr);
+ pe.config = PERF_COUNT_HW_INSTRUCTIONS;
+ pe.disabled = 1;
+ pe.exclude_kernel = 1;
+ pe.exclude_hv = 1;
+ pe.read_format = PERF_FORMAT_GROUP |
+ PERF_FORMAT_ID |
+ PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_TOTAL_TIME_RUNNING;
+
+ uint8_t count = 0;
+ int fd = perf_event_open(&pe, 0, -1, -1, 0);
+ if (fd >= 0) {
+ const int groupFd = fd;
+ mIds[INSTRUCTIONS] = count++;
+ mCountersFd[INSTRUCTIONS] = fd;
+
+ pe.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
+
+ if (eventMask & EV_CPU_CYCLES) {
+ pe.type = PERF_TYPE_HARDWARE;
+ pe.config = PERF_COUNT_HW_CPU_CYCLES;
+ mCountersFd[CPU_CYCLES] = perf_event_open(&pe, 0, -1, groupFd, 0);
+ if (mCountersFd[CPU_CYCLES] > 0) {
+ mIds[CPU_CYCLES] = count++;
+ mEnabledEvents |= EV_CPU_CYCLES;
+ }
+ }
+
+ if (eventMask & EV_L1D_REFS) {
+ pe.type = PERF_TYPE_HARDWARE;
+ pe.config = PERF_COUNT_HW_CACHE_REFERENCES;
+ mCountersFd[DCACHE_REFS] = perf_event_open(&pe, 0, -1, groupFd, 0);
+ if (mCountersFd[DCACHE_REFS] > 0) {
+ mIds[DCACHE_REFS] = count++;
+ mEnabledEvents |= EV_L1D_REFS;
+ }
+ }
+
+ if (eventMask & EV_L1D_MISSES) {
+ pe.type = PERF_TYPE_HARDWARE;
+ pe.config = PERF_COUNT_HW_CACHE_MISSES;
+ mCountersFd[DCACHE_MISSES] = perf_event_open(&pe, 0, -1, groupFd, 0);
+ if (mCountersFd[DCACHE_MISSES] > 0) {
+ mIds[DCACHE_MISSES] = count++;
+ mEnabledEvents |= EV_L1D_MISSES;
+ }
+ }
+
+ if (eventMask & EV_BPU_REFS) {
+ pe.type = PERF_TYPE_HARDWARE;
+ pe.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
+ mCountersFd[BRANCHES] = perf_event_open(&pe, 0, -1, groupFd, 0);
+ if (mCountersFd[BRANCHES] > 0) {
+ mIds[BRANCHES] = count++;
+ mEnabledEvents |= EV_BPU_REFS;
+ }
+ }
+
+ if (eventMask & EV_BPU_MISSES) {
+ pe.type = PERF_TYPE_HARDWARE;
+ pe.config = PERF_COUNT_HW_BRANCH_MISSES;
+ mCountersFd[BRANCH_MISSES] = perf_event_open(&pe, 0, -1, groupFd, 0);
+ if (mCountersFd[BRANCH_MISSES] > 0) {
+ mIds[BRANCH_MISSES] = count++;
+ mEnabledEvents |= EV_BPU_MISSES;
+ }
+ }
+
+#ifdef __ARM_ARCH
+ if (eventMask & EV_L1I_REFS) {
+ pe.type = PERF_TYPE_RAW;
+ pe.config = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS;
+ mCountersFd[ICACHE_REFS] = perf_event_open(&pe, 0, -1, groupFd, 0);
+ if (mCountersFd[ICACHE_REFS] > 0) {
+ mIds[ICACHE_REFS] = count++;
+ mEnabledEvents |= EV_L1I_REFS;
+ }
+ }
+
+ if (eventMask & EV_L1I_MISSES) {
+ pe.type = PERF_TYPE_RAW;
+ pe.config = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL;
+ mCountersFd[ICACHE_MISSES] = perf_event_open(&pe, 0, -1, groupFd, 0);
+ if (mCountersFd[ICACHE_MISSES] > 0) {
+ mIds[ICACHE_MISSES] = count++;
+ mEnabledEvents |= EV_L1I_MISSES;
+ }
+ }
+#else
+ if (eventMask & EV_L1I_REFS) {
+ pe.type = PERF_TYPE_HW_CACHE;
+ pe.config = PERF_COUNT_HW_CACHE_L1I |
+ (PERF_COUNT_HW_CACHE_OP_READ<<8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS<<16);
+ mCountersFd[ICACHE_REFS] = perf_event_open(&pe, 0, -1, groupFd, 0);
+ if (mCountersFd[ICACHE_REFS] > 0) {
+ mIds[ICACHE_REFS] = count++;
+ mEnabledEvents |= EV_L1I_REFS;
+ }
+ }
+
+ if (eventMask & EV_L1I_MISSES) {
+ pe.type = PERF_TYPE_HW_CACHE;
+ pe.config = PERF_COUNT_HW_CACHE_L1I |
+ (PERF_COUNT_HW_CACHE_OP_READ<<8) | (PERF_COUNT_HW_CACHE_RESULT_MISS<<16);
+ mCountersFd[ICACHE_MISSES] = perf_event_open(&pe, 0, -1, groupFd, 0);
+ if (mCountersFd[ICACHE_MISSES] > 0) {
+ mIds[ICACHE_MISSES] = count++;
+ mEnabledEvents |= EV_L1I_MISSES;
+ }
+ }
+#endif
+ }
+#endif // __linux__
+ return mEnabledEvents;
+}
+
+} // namespace utils
--- /dev/null
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TNT_UTILS_PROFILER_H
+#define TNT_UTILS_PROFILER_H
+
+#include <assert.h>
+#include <stdint.h>
+
+#include <array>
+#include <chrono>
+
+#if defined(__linux__)
+# include <unistd.h>
+# include <sys/ioctl.h>
+# include <linux/perf_event.h>
+#endif
+
+namespace utils {
+
+class Profiler {
+ enum {
+ INSTRUCTIONS = 0, // must be zero
+ CPU_CYCLES = 1,
+ DCACHE_REFS = 2,
+ DCACHE_MISSES = 3,
+ BRANCHES = 4,
+ BRANCH_MISSES = 5,
+ ICACHE_REFS = 6,
+ ICACHE_MISSES = 7,
+
+ // Must be last one
+ EVENT_COUNT
+ };
+
+public:
+
+ enum {
+ EV_CPU_CYCLES = 1 << CPU_CYCLES,
+ EV_L1D_REFS = 1 << DCACHE_REFS,
+ EV_L1D_MISSES = 1 << DCACHE_MISSES,
+ EV_BPU_REFS = 1 << BRANCHES,
+ EV_BPU_MISSES = 1 << BRANCH_MISSES,
+ EV_L1I_REFS = 1 << ICACHE_REFS,
+ EV_L1I_MISSES = 1 << ICACHE_MISSES,
+ // helpers
+ EV_L1D_RATES = EV_L1D_REFS | EV_L1D_MISSES,
+ EV_L1I_RATES = EV_L1I_REFS | EV_L1I_MISSES,
+ EV_BPU_RATES = EV_BPU_REFS | EV_BPU_MISSES,
+ };
+
+ static Profiler& get() noexcept;
+
+
+ Profiler(const Profiler& rhs) = delete;
+ Profiler(Profiler&& rhs) = delete;
+ Profiler& operator=(const Profiler& rhs) = delete;
+ Profiler& operator=(Profiler&& rhs) = delete;
+
+ // selects which events are enabled.
+ // By Default: EV_CPU_CYCLES | EV_L1D_RATES | EV_BPU_RATES
+ uint32_t resetEvents(uint32_t eventMask) noexcept;
+
+ uint32_t getEnabledEvents() const noexcept { return mEnabledEvents; }
+
+ // could return false if performance counters are not supported/enabled
+ bool isValid() const { return mCountersFd[0] >= 0; }
+
+ class Counters {
+ friend class Profiler;
+ uint64_t nr;
+ uint64_t time_enabled;
+ uint64_t time_running;
+ struct {
+ uint64_t value;
+ uint64_t id;
+ } counters[Profiler::EVENT_COUNT];
+
+ friend Counters operator-(Counters lhs, const Counters& rhs) noexcept {
+ lhs.nr -= rhs.nr;
+ lhs.time_enabled -= rhs.time_enabled;
+ lhs.time_running -= rhs.time_running;
+ for (size_t i=0 ; i<EVENT_COUNT ; ++i) {
+ lhs.counters[i].value -= rhs.counters[i].value;
+ }
+ return lhs;
+ }
+
+ public:
+ uint64_t getInstructions() const { return counters[INSTRUCTIONS].value; }
+ uint64_t getCpuCycles() const { return counters[CPU_CYCLES].value; }
+ uint64_t getL1DReferences() const { return counters[DCACHE_REFS].value; }
+ uint64_t getL1DMisses() const { return counters[DCACHE_MISSES].value; }
+ uint64_t getL1IReferences() const { return counters[ICACHE_REFS].value; }
+ uint64_t getL1IMisses() const { return counters[ICACHE_MISSES].value; }
+ uint64_t getBranchInstructions() const { return counters[BRANCHES].value; }
+ uint64_t getBranchMisses() const { return counters[BRANCH_MISSES].value; }
+
+ std::chrono::duration<uint64_t, std::nano> getWallTime() const {
+ return std::chrono::duration<uint64_t, std::nano>(time_enabled);
+ }
+
+ std::chrono::duration<uint64_t, std::nano> getRunningTime() const {
+ return std::chrono::duration<uint64_t, std::nano>(time_running);
+ }
+
+ double getIPC() const noexcept {
+ uint64_t cpuCycles = getCpuCycles();
+ uint64_t instructions = getInstructions();
+ return double(instructions) / double(cpuCycles);
+ }
+
+ double getCPI() const noexcept {
+ uint64_t cpuCycles = getCpuCycles();
+ uint64_t instructions = getInstructions();
+ return double(cpuCycles) / double(instructions);
+ }
+
+ double getL1DMissRate() const noexcept {
+ uint64_t cacheReferences = getL1DReferences();
+ uint64_t cacheMisses = getL1DMisses();
+ return double(cacheMisses) / double(cacheReferences);
+ }
+
+ double getL1DHitRate() const noexcept {
+ return 1.0 - getL1DMissRate();
+ }
+
+ double getL1IMissRate() const noexcept {
+ uint64_t cacheReferences = getL1IReferences();
+ uint64_t cacheMisses = getL1IMisses();
+ return double(cacheMisses) / double(cacheReferences);
+ }
+
+ double getL1IHitRate() const noexcept {
+ return 1.0 - getL1IMissRate();
+ }
+
+ double getBranchMissRate() const noexcept {
+ uint64_t branchReferences = getBranchInstructions();
+ uint64_t branchMisses = getBranchMisses();
+ return double(branchMisses) / double(branchReferences);
+ }
+
+ double getBranchHitRate() const noexcept {
+ return 1.0 - getBranchMissRate();
+ }
+
+ double getMPKI(uint64_t misses) const noexcept {
+ return (misses * 1000.0) / getInstructions();
+ }
+
+ };
+
+#if defined(__linux__)
+
+ void reset() noexcept {
+ int fd = mCountersFd[0];
+ ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
+ }
+
+ void start() noexcept {
+ int fd = mCountersFd[0];
+ ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
+ }
+
+ void stop() noexcept {
+ int fd = mCountersFd[0];
+ ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
+ }
+
+ void readCounters(Counters* outCounters) noexcept {
+ Counters counters;
+ ssize_t n = read(mCountersFd[0], &counters, sizeof(Counters));
+ memset(outCounters, 0, sizeof(Counters));
+ if (n > 0) {
+ outCounters->nr = counters.nr;
+ outCounters->time_enabled = counters.time_enabled;
+ outCounters->time_running = counters.time_running;
+ for (size_t i=0 ; i<size_t(EVENT_COUNT) ; i++) {
+ if (mCountersFd[i] >= 0) {
+ outCounters->counters[i] = counters.counters[mIds[i]];
+ }
+ }
+ }
+ }
+
+#else // !__linux__
+
+ void reset() noexcept { }
+ void start() noexcept { }
+ void stop() noexcept { }
+ void readCounters(Counters* counters) noexcept { }
+
+#endif // __linux__
+
+ bool hasBranchRates() const noexcept {
+ return (mCountersFd[BRANCHES] >= 0) && (mCountersFd[BRANCH_MISSES] >= 0);
+ }
+
+ bool hasICacheRates() const noexcept {
+ return (mCountersFd[ICACHE_REFS] >= 0) && (mCountersFd[ICACHE_MISSES] >= 0);
+ }
+
+private:
+ Profiler() noexcept;
+ ~Profiler() noexcept;
+
+ std::array<uint8_t, EVENT_COUNT> mIds;
+ std::array<int, EVENT_COUNT> mCountersFd;
+ uint32_t mEnabledEvents = 0;
+};
+
+} // namespace utils
+
+#endif // TNT_UTILS_PROFILER_H
mov r0, r0 ; \
mov r0, r0 ; \
mov r0, r0 ; \
+ mov r0, r0 ; \
+ mov r0, r0 ; \
+ mov r0, r0 ; \
+ mov r0, r0 ; \
+ mov r0, r0 ; \
+ mov r0, r0 ; \
+ mov r0, r0 ; \
+ mov r0, r0 ; \
beq end_loop ; \
mov r0, r0 ; \
mov r0, r0
mov r0, r0
mov r0, r0
+ mov r0, r0
+ mov r0, r0
+ mov r0, r0
+ mov r0, r0
+ mov r0, r0
+ mov r0, r0
+ mov r0, r0
+ mov r0, r0
end_loop:
subs r0, r0, r1
+++ /dev/null
-/*
- * icache.s
- *
- *
- * Copyright 2005 The Android Open Source Project
- *
- */
-
- .text
- .align
-
- .global icache_test2
- .type icache_test2, %function
-
-#define LOOP \
- mov r0, r0 ; \
- mov r0, r0 ; \
- mov r0, r0 ; \
- mov r0, r0 ; \
- mov r0, r0 ; \
- mov r0, r0 ; \
- mov r0, r0 ; \
- mov r0, r0 ;
-
-
- /*
- * r0 = loop_count
- * r1 = step
- * r2 = mask
- */
-
-icache_test2:
-end_loop:
-
- /* each loop iteration is one cache line
- repeat this block 2048 times... */
-
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
- LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP
-
- subs r0, r0, #1
- bgt end_loop
- bx lr
-
-
+++ /dev/null
-#include <stdio.h>
-#include <sys/time.h>
-
-extern void icache_test(long count, long step);
-extern void icache_test2(long count);
-
-int main()
-{
- printf("[bytes]\t[us]\n");
-
- struct timeval now, tm;
- long long t;
- long MBs;
- long i;
- long step = 32;
- for (i=0 ; step<=2048 ; i++, step+=32)
- {
- long value;
- gettimeofday(&now, 0);
- icache_test(0x800000L, step);
- gettimeofday(&tm, 0);
- t = (tm.tv_sec*1000000LL+tm.tv_usec) - (now.tv_sec*1000000LL+now.tv_usec);
- printf("%6ld\t%lld\n", step*32, t);
- }
-
- gettimeofday(&now, 0);
- icache_test2(0x800000L / 2048);
- gettimeofday(&tm, 0);
- t = (tm.tv_sec*1000000LL+tm.tv_usec) - (now.tv_sec*1000000LL+now.tv_usec);
- MBs = (8388608LL*32*1000000) / (t * (1024*1024));
- printf("\n%6lld us\t%ld MB/s\n", t, MBs);
-
- return 0;
-}
--- /dev/null
+#include <stdio.h>
+#include <sys/time.h>
+#include <getopt.h>
+
+#include <thread>
+#include <iostream>
+#include <iomanip>
+
+#include <sched.h>
+
+#include "Profiler.h"
+
+extern "C" void icache_test(long count, long step);
+
+static constexpr size_t MAX_CODE_SIZE = 128*1024;
+static constexpr size_t CACHE_LINE_SIZE = 64;
+static constexpr size_t MAX_ITERATIONS_COUNT = MAX_CODE_SIZE / CACHE_LINE_SIZE;
+static constexpr size_t REPETITIONS = 0x800000L;
+
+
+using namespace utils;
+
+static cpu_set_t g_cpu_set;
+
+static void printUsage(char* name) {
+ std::string exec_name(name);
+ std::string usage(
+ "ICACHE is a command-line tool for testing the L1 instruction cache performance.\n"
+ "(Make sure security.perf_harden is set to 0)\n\n"
+ "Usages:\n"
+ " ICACHE [options]\n"
+ "\n"
+ "Options:\n"
+ " --help, -h\n"
+ " print this message\n\n"
+ " --affinity=N, -a N\n"
+ " Specify which CPU the test should run on.\n\n"
+ );
+ const std::string from("ICACHE");
+ for (size_t pos = usage.find(from); pos != std::string::npos; pos = usage.find(from, pos)) {
+ usage.replace(pos, from.length(), exec_name);
+ }
+ printf("%s", usage.c_str());
+}
+
+static int handleCommandLineArgments(int argc, char* argv[]) {
+ static constexpr const char* OPTSTR = "ha:";
+ static const struct option OPTIONS[] = {
+ { "help", no_argument, 0, 'h' },
+ { "affinity", required_argument, 0, 'a' },
+ { 0, 0, 0, 0 } // termination of the option list
+ };
+ int opt;
+ int option_index = 0;
+ while ((opt = getopt_long(argc, argv, OPTSTR, OPTIONS, &option_index)) >= 0) {
+ std::string arg(optarg ? optarg : "");
+ switch (opt) {
+ default:
+ case 'h':
+ printUsage(argv[0]);
+ exit(0);
+ break;
+ case 'a':
+ size_t cpu = std::stoi(arg);
+ if (cpu < std::thread::hardware_concurrency()) {
+ CPU_SET(cpu, &g_cpu_set);
+ } else {
+ std::cerr << "N must be < " << std::thread::hardware_concurrency() << std::endl;
+ exit(0);
+ }
+ break;
+ }
+ }
+ return optind;
+}
+
+int main(int argc, char* argv[]) {
+ CPU_ZERO(&g_cpu_set);
+
+ [[maybe_unused]] int option_index = handleCommandLineArgments(argc, argv);
+ [[maybe_unused]] int num_args = argc - option_index;
+
+ if (CPU_COUNT(&g_cpu_set)) {
+ sched_setaffinity(gettid(), sizeof(g_cpu_set), &g_cpu_set);
+ }
+
+ Profiler& profiler = Profiler::get();
+ profiler.resetEvents(Profiler::EV_CPU_CYCLES | Profiler::EV_L1I_RATES);
+
+ if (!profiler.isValid()) {
+ fprintf(stderr, "performance counters not enabled. try \"setprop security.perf_harden 0\"\n");
+ exit(0);
+ }
+
+ size_t const stepInBytes = 1024; // 1 KiB steps
+ size_t const step = stepInBytes / CACHE_LINE_SIZE;
+
+ std::cout << std::fixed << std::setprecision(2);
+
+ printf("[KiB]\t[cyc]\t[refs]\t[MPKI]\t[ns]\n");
+
+ Profiler::Counters counters;
+
+ for (size_t i=step ; i <= MAX_ITERATIONS_COUNT ; i += step) {
+ profiler.reset();
+
+ auto now = std::chrono::steady_clock::now();
+ profiler.start();
+ icache_test(REPETITIONS, i);
+ profiler.stop();
+ auto duration = std::chrono::steady_clock::now() - now;
+
+ profiler.readCounters(&counters);
+
+ std::cout << ((i*CACHE_LINE_SIZE)/1024) << "\t"
+ << counters.getCpuCycles()/double(REPETITIONS) << "\t"
+ << counters.getL1IReferences()/double(REPETITIONS) << "\t"
+ << counters.getMPKI(counters.getL1IMisses()) << "\t"
+ << duration.count()/double(REPETITIONS) << "\t"
+ << std::endl;
+ }
+
+ return 0;
+}
" -s --size: Size in kbytes of the data.\n"
" -S --chunk-size: Size of a chunk. Default to size ie 1 chunk.\n"
" Data will be written/read using that chunk size.\n"
- " -D --depth: Depth of directory tree to create for traversal.\n",
+ " -D --depth: Depth of directory tree to create for traversal.\n"
" -i --iterations: Number of time a process should carry its task.\n"
" -p --procnb: Number of processes to use.\n"
" -d --dump: Print the raw timing on stdout.\n"
# Number Slow bitmap uploads: 12
# Number Slow draw: 89
# use with "stdbuf -o0 " to disable pipe buffering
- # stdbuf -o0 adb shell /data/hwuitest shadowgrid2 400 | stdbuf -o0 ./hwuitestfilter.sh | tee t.csv
+ # stdbuf -o0 adb shell /data/local/tmp/hwuimacro shadowgrid2 400 | stdbuf -o0 ./hwuitestfilter.sh | tee t.csv
sed -e 's/ns//' -e 's/[\(\)%]/ /g' | awk '
BEGIN { startTime=0; lastTime=0; }
/^Stats since:/ {
case $DEVICE in
(shamu|hammerhead)
- HWUITEST=hwuitest
+ HWUIMACRO=hwuimacro
onSwipe="700 1847 700 400 50"
;;
(*)
- HWUITEST=hwuitest64
+ HWUIMACRO=hwuimacro64
onSwipe="500 1200 500 550 150"
;;
esac
-scripts="defs.sh systemapps.sh recentfling.sh youtube.sh chromefling.sh $HWUITEST"
+scripts="defs.sh systemapps.sh recentfling.sh youtube.sh chromefling.sh"
if ! $MONSOON >/dev/null 2>&1; then
echo $MONSOON must be in your PATH >&2
echo Copying $scripts to device $devdir...
copy_files
+adb shell ln -s /data/benchmarktest/hwuimacro/$HWUIMACRO $devdir/$HWUIMACRO
tests=""
# measure background power
if [ $shadowgrid2Time -gt 0 ]; then
airplane_mode on
echo $(date) Test 4 : shadowgrid2 for $shadowgrid2Time minutes
- start_job "./$HWUITEST shadowgrid2 100000"
+ start_job "./$HWUIMACRO --onscreen shadowgrid2 100000"
run_test shadowgrid2 $shadowgrid2Time
- cleanup_job shadowgrid2 $HWUITEST
+ cleanup_job shadowgrid2 $HWUIMACRO
airplane_mode off
date
tests="$tests shadowgrid2"