2 * Copyright (C) 2007 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
24 #include <sys/resource.h>
25 #include <sys/syscall.h>
26 #include <sys/types.h>
30 const int DCACHE_SIZE = 8*1024;
31 const int CPU_FREQ_EST = 195;
32 const int BRANCH_CYCLE = 3;
34 const int DCACHE_SIZE = 32*1024;
35 const int CPU_FREQ_EST = 384;
36 const int BRANCH_CYCLE = 2;
39 //extern "C" void* xmemcpy(void*, void*, size_t);
42 typedef long long nsecs_t;
44 static nsecs_t system_time()
47 t.tv_sec = t.tv_nsec = 0;
48 clock_gettime(CLOCK_MONOTONIC, &t);
49 return nsecs_t(t.tv_sec)*1000000000LL + t.tv_nsec;
52 nsecs_t loop_overhead(size_t count) __attribute__((noinline));
53 nsecs_t loop_overhead(size_t count)
55 nsecs_t overhead = -system_time();
57 asm volatile ("":::"memory");
59 overhead += system_time();
63 static void preload(volatile char* addr, size_t s)
65 for (size_t i=0 ; i<s ; i+=32) {
71 static void usage(char* p) {
72 printf( "Usage: %s <test> <options>\n"
73 "<test> is one of the following:\n"
75 " memcpy [perf [fast] | test]\n"
76 " memset [perf | test]\n"
77 " memcmp [perf | test]\n"
78 " strlen [perf | test]\n"
83 " stack (stack smasher)\n"
88 int cpufreq_test(int argc, char** argv);
89 int memcpy_test(int argc, char** argv);
90 int memset_test(int argc, char** argv);
91 int memcmp_test(int argc, char** argv);
92 int strlen_test(int argc, char** argv);
93 int malloc_test(int argc, char** argv);
94 int madvise_test(int argc, char** argv);
95 int crash_test(int argc, char** argv);
96 int stack_smasher_test(int argc, char** argv);
97 int crawl_test(int argc, char** argv);
104 int main(int argc, char** argv)
111 if (!strcmp(argv[1], "cpufreq")) err = cpufreq_test(argc-1, argv+1);
112 else if (!strcmp(argv[1], "memcpy")) err = memcpy_test(argc-1, argv+1);
113 else if (!strcmp(argv[1], "memset")) err = memset_test(argc-1, argv+1);
114 else if (!strcmp(argv[1], "memcmp")) err = memcmp_test(argc-1, argv+1);
115 else if (!strcmp(argv[1], "strlen")) err = strlen_test(argc-1, argv+1);
116 else if (!strcmp(argv[1], "malloc")) err = malloc_test(argc-1, argv+1);
117 else if (!strcmp(argv[1], "madvise")) err = madvise_test(argc-1, argv+1);
118 else if (!strcmp(argv[1], "crash")) err = crash_test(argc-1, argv+1);
119 else if (!strcmp(argv[1], "stack")) err = stack_smasher_test(argc-1, argv+1);
120 else if (!strcmp(argv[1], "crawl")) err = crawl_test(argc-1, argv+1);
132 int validate_memcpy(char* s, char* d, size_t size);
133 int validate_memset(char* s, char c, size_t size);
135 int memcpy_test(int argc, char** argv)
139 if (!strcmp(argv[1], "perf")) option = 0;
140 else if (!strcmp(argv[1], "test")) option = 1;
144 const int MAX_SIZE = 1024*1024; // 1MB
145 const int CACHED_SPEED_EST = CPU_FREQ_EST*1024*1024; // 150 MB/s
146 const int UNCACHED_SPEED_EST = (CPU_FREQ_EST/4)*1024*1024; // 60 MB/s
147 char* src = (char*)malloc(MAX_SIZE+4+8+32);
148 char* dst = (char*)malloc(MAX_SIZE+4+8+32);
149 memset(src, 0, MAX_SIZE+4+8+32);
150 memset(dst, 0, MAX_SIZE+4+8+32);
153 bool fast = (argc>=3 && !strcmp(argv[2], "fast"));
154 printf("memcpy() performance test is running, please wait...\n");
157 setpriority(PRIO_PROCESS, 0, -20);
158 static int FAST_SIZES[] = { 1024, DCACHE_SIZE/2, DCACHE_SIZE, DCACHE_SIZE*2, MAX_SIZE };
160 struct result_t { int size; float res; };
161 result_t* results = (result_t*)src;
164 for (int i=0 ; ; i++) {
166 if (size<128) size += 8;
167 else if (size<1024) size += 128;
168 else if (size<16384) size += 1024;
171 if (size_t(i) >= sizeof(FAST_SIZES)/sizeof(FAST_SIZES[0]))
173 size = FAST_SIZES[i];
175 if (size > MAX_SIZE) {
179 const int REPEAT = (((size < DCACHE_SIZE) ?
180 (CACHED_SPEED_EST) : (UNCACHED_SPEED_EST)) / size) / 2;
181 // ~0.5 second per test
183 const nsecs_t overhead = loop_overhead(REPEAT);
185 // tweak to make it a bad case
186 char* ddd = (char*)((long(dst+31)&~31) + 4);
187 char* sss = (char*)((long(src+31)&~31) + 28);
189 for (int offset=0 ; offset<=2 ; offset +=2 ) {
190 memcpy(dst, src, size); // just make sure to load the caches I/D
191 nsecs_t t = -system_time();
192 register int count = REPEAT;
194 MEMCPY(ddd, sss+offset, size);
196 t += system_time() - overhead;
197 const float throughput = (size*1000000000.0f*REPEAT) / (1024*1024*t);
198 results[nbr].size = size;
199 results[nbr].res = throughput;
204 printf("%9s %9s %9s\n", "size", "MB/s", "MB/s (nc)");
205 for (int i=0 ; i<nbr ; i+=2) {
206 printf("%9d %9ld %9ld\n", results[i].size, (long)results[i].res, (long)results[i+1].res);
208 } else if (option == 1) {
209 printf("memcpy() validation test is running, please wait...\n");
211 char* curr = (char*)src;
212 for (int i=0 ; i<MAX_SIZE ; i++) {
214 *curr++ = c != 0x55 ? c : 0xAA;
216 char* s = src + 1024;
217 char* d = dst + 1024;
219 for (int size=0 ; size<4096 && !nb ; size++) {
220 nb += validate_memcpy(s, d, size);
221 for (int o=1 ; o<32 && !nb ; o++) {
222 nb += validate_memcpy(s+o, d, size);
223 nb += validate_memcpy(s, d+o, size);
224 nb += validate_memcpy(s+o, d+o, size);
227 if (nb) printf("%d error(s) found\n", nb);
228 else printf("success!\n");
236 int validate_memcpy(char* s, char* d, size_t size)
239 memset(d-4, 0x55, size+8);
241 if (memcmp(s,d,size)) {
242 printf("*** memcpy(%p,%p,%lu) destination != source\n",s,d,size);
245 bool r = (d[size]==0x55)&&(d[size+1]==0x55)&&(d[size+2]==0x55)&&(d[size+3]==0x55);
247 printf("*** memcpy(%p,%p,%lu) clobbered past end of destination!\n",s,d,size);
250 r = (d[-1]==0x55)&&(d[-2]==0x55)&&(d[-3]==0x55)&&(d[-4]==0x55);
252 printf("*** memcpy(%p,%p,%lu) clobbered before start of destination!\n",s,d,size);
264 int memset_test(int argc, char** argv)
268 if (!strcmp(argv[1], "perf")) option = 0;
269 else if (!strcmp(argv[1], "test")) option = 1;
273 const int MAX_SIZE = 1024*1024; // 1MB
274 const int CACHED_SPEED_EST = CPU_FREQ_EST*1024*1024; // 195 MB/s
275 const int UNCACHED_SPEED_EST = CPU_FREQ_EST*1024*1024; // 195 MB/s
276 char* dst = (char*)malloc(MAX_SIZE+4+8);
279 printf("memset() performance test is running, please wait...\n");
282 setpriority(PRIO_PROCESS, 0, -20);
284 static int FAST_SIZES[] = { 1024, DCACHE_SIZE/2, DCACHE_SIZE, DCACHE_SIZE*2, MAX_SIZE };
285 const size_t FAST_SIZES_COUNT = sizeof(FAST_SIZES)/sizeof(FAST_SIZES[0]);
286 struct result_t { int size; float res; };
287 result_t results[FAST_SIZES_COUNT*2];
290 for (int i=0 ; ; i++) {
291 if (size_t(i) >= sizeof(FAST_SIZES)/sizeof(FAST_SIZES[0]))
293 size = FAST_SIZES[i];
294 if (size > MAX_SIZE) {
297 const int REPEAT = (((size < DCACHE_SIZE) ?
298 (CACHED_SPEED_EST) : (UNCACHED_SPEED_EST)) / size);
299 // ~0.5 second per test
301 const nsecs_t overhead = loop_overhead(REPEAT);
303 for (int j=0 ; j<2 ; j++) {
304 if (j==0) preload(dst, DCACHE_SIZE*4); // flush D
305 else preload(dst, size); // load D
306 nsecs_t t = -system_time();
307 size_t count = REPEAT;
309 memset(dst, 0, size);
311 t += system_time() - overhead;
313 const float throughput = (size*1000000000.0f*REPEAT) / (1024*1024*t);
314 results[nbr].size = size;
315 results[nbr].res = throughput;
320 printf("%9s %9s %9s\n", "size", "MB/s", "MB/s (cached)");
321 for (int i=0 ; i<nbr ; i+=2) {
322 printf("%9d %9ld %9ld\n", results[i].size, (long)results[i].res, (long)results[i+1].res);
324 } else if (option == 1) {
325 printf("memset() validation test is running, please wait...\n");
327 char* d = dst + 1024;
329 for (int o=1 ; o<32 ; o++) {
330 for (int size=0 ; size<4096 && !nb ; size++) {
331 nb += validate_memset(d, char(o), size);
332 nb += validate_memset(d+o, char(o), size);
335 if (nb) printf("%d error(s) found\n", nb);
336 else printf("success!\n");
343 int validate_memset(char* d, char c, size_t size)
346 for (size_t i=0; i<size ; d[i++]=0xaa) ;
350 if (d[size+1]!=0x55) {
351 printf("*** memset(%p,%02x,%lu) clobbered past end of destination!\n",d,(int)c,size);
355 printf("*** memset(%p,%02x,%lu) clobbered before start of destination!\n",d,(int)c,size);
358 for (size_t i=0 ; i<size ; i++) {
360 printf("*** memset(%p,%02x,%lu) failed at offset %lu\n",d,(int)c,size, i);
373 static int ref_memcmp(const void *s1, const void *s2, size_t n)
375 const unsigned char *c1 = (const unsigned char *)s1, *c2 = (const unsigned char *)s2;
379 d = (int)*c1++ - (int)*c2++;
387 int validate_memcmp(const char* s, const char* d, size_t size)
390 int a = ref_memcmp(s, d, size);
391 int b = memcmp(s, d, size);
392 //printf("%d, %d\n", a, b);
394 printf("*** memcmp(%p,%p,%lu) failed %d should be %d\n",s,d,size,b,a);
400 int memcmp_test(int argc, char** argv)
404 if (!strcmp(argv[1], "perf")) option = 0;
405 else if (!strcmp(argv[1], "test")) option = 1;
409 const int MAX_SIZE = 1024*1024; // 1MB
410 const int CACHED_SPEED_EST = CPU_FREQ_EST*1024*1024; // 150 MB/s
411 const int UNCACHED_SPEED_EST = (CPU_FREQ_EST/4)*1024*1024; // 60 MB/s
412 char* src = (char*)malloc(MAX_SIZE+4+8+32);
413 char* dst = (char*)malloc(MAX_SIZE+4+8+32);
416 printf("memcmp() performance test is running, please wait...\n");
419 setpriority(PRIO_PROCESS, 0, -20);
421 static int FAST_SIZES[] = { 1024, DCACHE_SIZE/2, DCACHE_SIZE, DCACHE_SIZE*2, MAX_SIZE };
423 struct result_t { int size; float res; };
424 result_t* results = (result_t*)src;
427 for (int i=0 ; ; i++) {
428 if (size_t(i) >= sizeof(FAST_SIZES)/sizeof(FAST_SIZES[0]))
430 size = FAST_SIZES[i];
431 if (size > MAX_SIZE) {
435 const int REPEAT = (((size < DCACHE_SIZE) ?
436 (CACHED_SPEED_EST) : (UNCACHED_SPEED_EST)) / size) / 2;
437 // ~0.5 second per test
439 const nsecs_t overhead = loop_overhead(REPEAT);
441 // tweak to make it a bad case
442 char* ddd = (char*)((long(dst+31)&~31) + 4);
443 char* sss = (char*)((long(src+31)&~31) + 28);
445 for (int offset=0 ; offset<=2 ; offset +=2 ) {
446 memcpy(ddd, sss+offset, size); // just make sure to load the caches I/D
447 nsecs_t t = -system_time();
448 register int count = REPEAT;
450 c = memcmp(ddd, sss+offset, size);
451 //printf("size %d, memcmp -> %d\n", size, (int)c);
453 c = memcmp(ddd, sss+offset, size);
454 asm volatile (""::"r"(c):"memory");
456 t += system_time() - overhead;
457 const float throughput = (size*1000000000.0f*REPEAT) / (1024*1024*t);
458 results[nbr].size = size;
459 results[nbr].res = throughput;
464 printf("%9s %9s %9s\n", "size", "MB/s", "MB/s (nc)");
465 for (int i=0 ; i<nbr ; i+=2) {
466 printf("%9d %9ld %9ld\n", results[i].size, (long)results[i].res, (long)results[i+1].res);
469 printf("memcmp() validation test is running, please wait...\n");
472 const char* const s = (const char*)src + 1024;
473 const char* const d = (const char*)dst + 1024;
475 for (int j=0 ; j<32 ; j++) {
477 char *curr0 = (char*)src;
478 char *curr1 = (char*)dst;
479 for (int i=0 ; i<MAX_SIZE ; i++) {
485 src[1024 + j] ^= 0xFF;
489 for (int size=0 ; size<32 && !nb ; size++) {
490 for (int o=0 ; o<4 ; o++) {
491 nb += validate_memcmp(s+o, d+o, size);
493 // memmove((char*)d+1, d, size);
494 for (int o=0 ; o<4 ; o++) {
495 nb += validate_memcmp(s, d+o, size);
499 if (nb) printf("%d error(s) found\n", nb);
500 else printf("success!\n");
513 int strlen_test(int argc, char** argv)
517 if (!strcmp(argv[1], "perf")) option = 0;
518 else if (!strcmp(argv[1], "test")) option = 1;
522 const int MAX_SIZE = 1024*1024; // 1MB
523 const int CACHED_SPEED_EST = CPU_FREQ_EST*1024*1024; // 195 MB/s
524 const int UNCACHED_SPEED_EST = CPU_FREQ_EST*1024*1024; // 195 MB/s
525 char* str = (char*)calloc(MAX_SIZE+4+8, 1);
528 printf("strlen() performance test is running, please wait...\n");
531 setpriority(PRIO_PROCESS, 0, -20);
533 static int FAST_SIZES[] = { 1024, DCACHE_SIZE/2, DCACHE_SIZE, DCACHE_SIZE*2, MAX_SIZE };
534 const size_t FAST_SIZES_COUNT = sizeof(FAST_SIZES)/sizeof(FAST_SIZES[0]);
535 struct result_t { int size; float res; };
536 result_t results[FAST_SIZES_COUNT*2];
539 for (int i=0 ; ; i++) {
540 if (size_t(i) >= sizeof(FAST_SIZES)/sizeof(FAST_SIZES[0]))
542 size = FAST_SIZES[i];
543 if (size > MAX_SIZE) {
546 const int REPEAT = (((size < DCACHE_SIZE) ?
547 (CACHED_SPEED_EST) : (UNCACHED_SPEED_EST)) / size);
548 // ~0.5 second per test
550 const nsecs_t overhead = loop_overhead(REPEAT);
552 for (int j=0 ; j<2 ; j++) {
553 memset(str, 'A', size-1);
554 if (j==0) preload(str, DCACHE_SIZE*4); // flush D
555 else preload(str, size); // load D
557 nsecs_t t = -system_time();
558 size_t count = REPEAT;
562 asm volatile (""::"r"(c):"memory");
564 t += system_time() - overhead;
566 const float throughput = (size*1000000000.0f*REPEAT) / (1024*1024*t);
567 results[nbr].size = size;
568 results[nbr].res = throughput;
573 printf("%9s %9s %9s\n", "size", "MB/s", "MB/s (cached)");
574 for (int i=0 ; i<nbr ; i+=2) {
575 printf("%9d %9ld %9ld\n", results[i].size, (long)results[i].res, (long)results[i+1].res);
590 int malloc_test(int argc, char** argv)
592 bool fill = (argc>=2 && !strcmp(argv[1], "fill"));
594 size_t size = 0x40000000;
596 void* addr = malloc(size);
598 printf("size = %9lu failed\n", size);
602 printf("size = %9lu, addr = %p (total = %9lu (%lu MB))\n",
603 size, addr, total, total / (1024*1024));
605 printf("filling...\n");
607 memset(addr, 0, size);
609 size = size + size>>1;
612 printf("done. allocated %lu MB\n", total / (1024*1024));
621 int madvise_test(int argc, char** argv)
623 for (int i=0 ; i<2 ; i++) {
624 size_t size = i==0 ? 4096 : 48*1024*1024; // 48 MB
625 printf("Allocating %lu MB... ", size/(1024*1024)); fflush(stdout);
626 void* addr1 = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
627 printf("%p (%s)\n", addr1, addr1==(void*)-1 ? "failed" : "OK"); fflush(stdout);
629 printf("touching %p...\n", addr1); fflush(stdout);
630 memset(addr1, 0x55, size);
632 printf("advising DONTNEED...\n"); fflush(stdout);
633 madvise(addr1, size, MADV_DONTNEED);
635 printf("reading back %p...\n", addr1); fflush(stdout);
636 if (*(long*)addr1 == 0) {
637 printf("madvise freed some pages\n");
638 } else if (*(long*)addr1 == 0x55555555) {
639 printf("pages are still there\n");
641 printf("getting garbage back\n");
644 printf("Allocating %lu MB... ", size/(1024*1024)); fflush(stdout);
645 void* addr2 = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
646 printf("%p (%s)\n", addr2, addr2==(void*)-1 ? "failed" : "OK"); fflush(stdout);
648 printf("touching %p...\n", addr2); fflush(stdout);
649 memset(addr2, 0xAA, size);
651 printf("unmap %p ...\n", addr2); fflush(stdout);
654 printf("touching %p...\n", addr1); fflush(stdout);
655 memset(addr1, 0x55, size);
657 printf("unmap %p ...\n", addr1); fflush(stdout);
661 printf("Done\n"); fflush(stdout);
670 int cpufreq_test(int argc, char** argv)
673 clock_getres(CLOCK_REALTIME, &res);
674 printf("CLOCK_REALTIME resolution: %lu ns\n", res.tv_nsec);
675 clock_getres(CLOCK_MONOTONIC, &res);
676 printf("CLOCK_MONOTONIC resolution: %lu ns\n", res.tv_nsec);
677 clock_getres(CLOCK_PROCESS_CPUTIME_ID, &res);
678 printf("CLOCK_PROCESS_CPUTIME_ID resolution: %lu ns\n", res.tv_nsec);
679 clock_getres(CLOCK_THREAD_CPUTIME_ID, &res);
680 printf("CLOCK_THREAD_CPUTIME_ID resolution: %lu ns\n", res.tv_nsec);
682 if (clock_getres(CLOCK_REALTIME_HR, &res) != 0)
683 printf("CLOCK_REALTIME_HR resolution: %lu ns\n", res.tv_nsec);
685 printf("CLOCK_REALTIME_HR not supported\n");
687 if (clock_getres(CLOCK_MONOTONIC_HR, &res) != 0)
688 printf("CLOCK_MONOTONIC_HR resolution: %lu ns\n", res.tv_nsec);
690 printf("CLOCK_MONOTONIC_HR not supported\n");
692 printf("\nEstimating the CPU frequency, please wait...\n");
695 setpriority(PRIO_PROCESS, 0, -20);
697 const int LOOP_CYCLES = 1+BRANCH_CYCLE; // 1 cycle + 3 cycles for the branch
698 const size_t REPEAT = CPU_FREQ_EST*1000000; // ~4 seconds (4cycles/loop)
699 register size_t count = REPEAT;
700 nsecs_t t = system_time();
701 do { // this loop generates 1+3 cycles
702 asm volatile ("":::"memory");
704 t = system_time() - t;
705 const float freq = t ? (1000.0f*float(REPEAT)*LOOP_CYCLES) / t : 0;
706 printf("this CPU frequency: %ld MHz\n", long(freq+0.5f));
712 #pragma mark crash_test
715 int crash_test(int argc, char** argv)
717 printf("about to crash...\n");
729 int stack_smasher_test(int argc, char** argv)
732 printf("corrupting our stack...\n");
733 *(volatile long long*)&dummy = 0;
737 // --------------------------------------------------------------------
739 extern "C" void thumb_function_1(int*p);
740 extern "C" void thumb_function_2(int*p);
741 extern "C" void arm_function_3(int*p);
742 extern "C" void arm_function_2(int*p);
743 extern "C" void arm_function_1(int*p);
745 void arm_function_3(int*p) {
747 thumb_function_2(&a);
750 void arm_function_2(int*p) {
752 thumb_function_1(&a);
755 void arm_function_1(int*p) {
760 int crawl_test(int argc, char** argv)