2 * Copyright (C) 2013 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #ifndef __BANDWIDTH_H__
18 #define __BANDWIDTH_H__
23 #include "utils/Compat.h"
26 // Bandwidth Class definitions.
27 class BandwidthBenchmark {
31 _num_warm_loops(DEFAULT_NUM_WARM_LOOPS),
32 _num_loops(DEFAULT_NUM_LOOPS) {}
33 virtual ~BandwidthBenchmark() {}
43 bench(_num_warm_loops);
45 nsecs_t t = system_time();
47 t = system_time() - t;
49 _mb_per_sec = (_size*(_num_loops/_BYTES_PER_MB))/(t/_NUM_NS_PER_SEC);
54 bool canRun() { return !usesNeon() || isNeonSupported(); }
56 virtual bool setSize(size_t size) = 0;
58 virtual const char *getName() = 0;
60 virtual bool verify() = 0;
62 virtual bool usesNeon() { return false; }
64 bool isNeonSupported() {
65 #if defined(__ARM_NEON__)
72 // Accessors/mutators.
73 double mb_per_sec() { return _mb_per_sec; }
74 size_t num_warm_loops() { return _num_warm_loops; }
75 size_t num_loops() { return _num_loops; }
76 size_t size() { return _size; }
78 void set_num_warm_loops(size_t num_warm_loops) {
79 _num_warm_loops = num_warm_loops;
81 void set_num_loops(size_t num_loops) { _num_loops = num_loops; }
84 static const unsigned int DEFAULT_NUM_WARM_LOOPS = 1000000;
85 static const unsigned int DEFAULT_NUM_LOOPS = 20000000;
88 virtual void bench(size_t num_loops) = 0;
92 size_t _num_warm_loops;
97 static const CONSTEXPR double _NUM_NS_PER_SEC = 1000000000.0;
98 static const CONSTEXPR double _BYTES_PER_MB = 1024.0* 1024.0;
101 class CopyBandwidthBenchmark : public BandwidthBenchmark {
103 CopyBandwidthBenchmark() : BandwidthBenchmark(), _src(NULL), _dst(NULL) { }
105 bool setSize(size_t size) {
114 _size = DEFAULT_COPY_SIZE;
119 _src = reinterpret_cast<char*>(memalign(64, _size));
121 perror("Failed to allocate memory for test.");
124 _dst = reinterpret_cast<char*>(memalign(64, _size));
126 perror("Failed to allocate memory for test.");
132 virtual ~CopyBandwidthBenchmark() {
144 memset(_src, 0x23, _size);
145 memset(_dst, 0, _size);
147 if (memcmp(_src, _dst, _size) != 0) {
148 printf("Buffers failed to compare after one loop.\n");
152 memset(_src, 0x23, _size);
153 memset(_dst, 0, _size);
156 if (memcmp(_src, _dst, _size) != 0) {
157 printf("Buffers failed to compare after two loops.\n");
168 static const unsigned int DEFAULT_COPY_SIZE = 8000;
171 class CopyLdrdStrdBenchmark : public CopyBandwidthBenchmark {
173 CopyLdrdStrdBenchmark() : CopyBandwidthBenchmark() { }
174 virtual ~CopyLdrdStrdBenchmark() {}
176 const char *getName() { return "ldrd/strd"; }
179 // Copy using ldrd/strd instructions.
180 void bench(size_t num_loops) {
182 "stmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
190 "mov r4, r2, lsr #6\n"
193 "ldrd r6, r7, [r0]\n"
194 "strd r6, r7, [r1]\n"
195 "ldrd r6, r7, [r0, #8]\n"
196 "strd r6, r7, [r1, #8]\n"
197 "ldrd r6, r7, [r0, #16]\n"
198 "strd r6, r7, [r1, #16]\n"
199 "ldrd r6, r7, [r0, #24]\n"
200 "strd r6, r7, [r1, #24]\n"
201 "ldrd r6, r7, [r0, #32]\n"
202 "strd r6, r7, [r1, #32]\n"
203 "ldrd r6, r7, [r0, #40]\n"
204 "strd r6, r7, [r1, #40]\n"
205 "ldrd r6, r7, [r0, #48]\n"
206 "strd r6, r7, [r1, #48]\n"
207 "ldrd r6, r7, [r0, #56]\n"
208 "strd r6, r7, [r1, #56]\n"
220 "ldmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
221 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
225 class CopyLdmiaStmiaBenchmark : public CopyBandwidthBenchmark {
227 CopyLdmiaStmiaBenchmark() : CopyBandwidthBenchmark() { }
228 virtual ~CopyLdmiaStmiaBenchmark() {}
230 const char *getName() { return "ldmia/stmia"; }
233 // Copy using ldmia/stmia instructions.
234 void bench(size_t num_loops) {
236 "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
244 "mov r4, r2, lsr #6\n"
247 "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
248 "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
250 "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
251 "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
259 "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
260 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
264 class CopyVld1Vst1Benchmark : public CopyBandwidthBenchmark {
266 CopyVld1Vst1Benchmark() : CopyBandwidthBenchmark() { }
267 virtual ~CopyVld1Vst1Benchmark() {}
269 const char *getName() { return "vld1/vst1"; }
271 bool usesNeon() { return true; }
274 // Copy using vld1/vst1 instructions.
275 void bench(size_t num_loops) {
276 #if defined(__ARM_NEON__)
278 "stmfd sp!, {r0,r1,r2,r3,r4}\n"
286 "mov r4, r2, lsr #6\n"
289 "vld1.8 {d0-d3}, [r0]!\n"
290 "vld1.8 {d4-d7}, [r0]!\n"
292 "vst1.8 {d0-d3}, [r1:128]!\n"
293 "vst1.8 {d4-d7}, [r1:128]!\n"
301 "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
302 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
307 class CopyVldrVstrBenchmark : public CopyBandwidthBenchmark {
309 CopyVldrVstrBenchmark() : CopyBandwidthBenchmark() { }
310 virtual ~CopyVldrVstrBenchmark() {}
312 const char *getName() { return "vldr/vstr"; }
314 bool usesNeon() { return true; }
317 // Copy using vldr/vstr instructions.
318 void bench(size_t num_loops) {
319 #if defined(__ARM_NEON__)
321 "stmfd sp!, {r0,r1,r2,r3,r4}\n"
329 "mov r4, r2, lsr #6\n"
332 "vldr d0, [r0, #0]\n"
334 "vldr d1, [r0, #8]\n"
335 "vstr d0, [r1, #0]\n"
336 "vldr d0, [r0, #16]\n"
337 "vstr d1, [r1, #8]\n"
338 "vldr d1, [r0, #24]\n"
339 "vstr d0, [r1, #16]\n"
340 "vldr d0, [r0, #32]\n"
341 "vstr d1, [r1, #24]\n"
342 "vldr d1, [r0, #40]\n"
343 "vstr d0, [r1, #32]\n"
344 "vldr d0, [r0, #48]\n"
345 "vstr d1, [r1, #40]\n"
346 "vldr d1, [r0, #56]\n"
347 "vstr d0, [r1, #48]\n"
349 "vstr d1, [r1, #56]\n"
358 "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
359 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
364 class CopyVldmiaVstmiaBenchmark : public CopyBandwidthBenchmark {
366 CopyVldmiaVstmiaBenchmark() : CopyBandwidthBenchmark() { }
367 virtual ~CopyVldmiaVstmiaBenchmark() {}
369 const char *getName() { return "vldmia/vstmia"; }
371 bool usesNeon() { return true; }
374 // Copy using vldmia/vstmia instructions.
375 void bench(size_t num_loops) {
376 #if defined(__ARM_NEON__)
378 "stmfd sp!, {r0,r1,r2,r3,r4}\n"
386 "mov r4, r2, lsr #6\n"
389 "vldmia r0!, {d0-d7}\n"
391 "vstmia r1!, {d0-d7}\n"
399 "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
400 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
405 class MemcpyBenchmark : public CopyBandwidthBenchmark {
407 MemcpyBenchmark() : CopyBandwidthBenchmark() { }
408 virtual ~MemcpyBenchmark() {}
410 const char *getName() { return "memcpy"; }
413 void bench(size_t num_loops) {
414 for (size_t i = 0; i < num_loops; i++) {
415 memcpy(_dst, _src, _size);
420 class SingleBufferBandwidthBenchmark : public BandwidthBenchmark {
422 SingleBufferBandwidthBenchmark() : BandwidthBenchmark(), _buffer(NULL) { }
423 virtual ~SingleBufferBandwidthBenchmark() {
430 bool setSize(size_t size) {
437 _size = DEFAULT_SINGLE_BUFFER_SIZE;
442 _buffer = reinterpret_cast<char*>(memalign(64, _size));
444 perror("Failed to allocate memory for test.");
447 memset(_buffer, 0, _size);
452 bool verify() { return true; }
457 static const unsigned int DEFAULT_SINGLE_BUFFER_SIZE = 16000;
460 class WriteBandwidthBenchmark : public SingleBufferBandwidthBenchmark {
462 WriteBandwidthBenchmark() : SingleBufferBandwidthBenchmark() { }
463 virtual ~WriteBandwidthBenchmark() { }
466 memset(_buffer, 0, _size);
468 for (size_t i = 0; i < _size; i++) {
469 if (_buffer[i] != 1) {
470 printf("Buffer failed to compare after one loop.\n");
475 memset(_buffer, 0, _size);
477 for (size_t i = 0; i < _size; i++) {
478 if (_buffer[i] != 2) {
479 printf("Buffer failed to compare after two loops.\n");
488 class WriteStrdBenchmark : public WriteBandwidthBenchmark {
490 WriteStrdBenchmark() : WriteBandwidthBenchmark() { }
491 virtual ~WriteStrdBenchmark() {}
493 const char *getName() { return "strd"; }
496 // Write a given value using strd.
497 void bench(size_t num_loops) {
499 "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
509 "mov r3, r1, lsr #5\n"
511 "add r4, r4, #0x01010101\n"
516 "strd r4, r5, [r0]\n"
517 "strd r4, r5, [r0, #8]\n"
518 "strd r4, r5, [r0, #16]\n"
519 "strd r4, r5, [r0, #24]\n"
527 "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
528 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
532 class WriteStmiaBenchmark : public WriteBandwidthBenchmark {
534 WriteStmiaBenchmark() : WriteBandwidthBenchmark() { }
535 virtual ~WriteStmiaBenchmark() {}
537 const char *getName() { return "stmia"; }
540 // Write a given value using stmia.
541 void bench(size_t num_loops) {
543 "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
552 "mov r3, r1, lsr #5\n"
554 "add r4, r4, #0x01010101\n"
565 "stmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
572 "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
573 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
577 class WriteVst1Benchmark : public WriteBandwidthBenchmark {
579 WriteVst1Benchmark() : WriteBandwidthBenchmark() { }
580 virtual ~WriteVst1Benchmark() {}
582 const char *getName() { return "vst1"; }
584 bool usesNeon() { return true; }
587 // Write a given value using vst.
588 void bench(size_t num_loops) {
589 #if defined(__ARM_NEON__)
591 "stmfd sp!, {r0,r1,r2,r3,r4}\n"
599 "mov r3, r1, lsr #5\n"
609 "vst1.8 {d0-d3}, [r0:128]!\n"
616 "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
617 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
622 class WriteVstrBenchmark : public WriteBandwidthBenchmark {
624 WriteVstrBenchmark() : WriteBandwidthBenchmark() { }
625 virtual ~WriteVstrBenchmark() {}
627 const char *getName() { return "vstr"; }
629 bool usesNeon() { return true; }
632 // Write a given value using vst.
633 void bench(size_t num_loops) {
634 #if defined(__ARM_NEON__)
636 "stmfd sp!, {r0,r1,r2,r3,r4}\n"
644 "mov r3, r1, lsr #5\n"
653 "vstr d0, [r0, #0]\n"
655 "vstr d1, [r0, #8]\n"
656 "vstr d0, [r0, #16]\n"
657 "vstr d1, [r0, #24]\n"
665 "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
666 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
671 class WriteVstmiaBenchmark : public WriteBandwidthBenchmark {
673 WriteVstmiaBenchmark() : WriteBandwidthBenchmark() { }
674 virtual ~WriteVstmiaBenchmark() {}
676 const char *getName() { return "vstmia"; }
678 bool usesNeon() { return true; }
681 // Write a given value using vstmia.
682 void bench(size_t num_loops) {
683 #if defined(__ARM_NEON__)
685 "stmfd sp!, {r0,r1,r2,r3,r4}\n"
693 "mov r3, r1, lsr #5\n"
703 "vstmia r0!, {d0-d3}\n"
710 "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
711 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
716 class MemsetBenchmark : public WriteBandwidthBenchmark {
718 MemsetBenchmark() : WriteBandwidthBenchmark() { }
719 virtual ~MemsetBenchmark() {}
721 const char *getName() { return "memset"; }
724 void bench(size_t num_loops) {
725 for (size_t i = 0; i < num_loops; i++) {
726 memset(_buffer, (i % 255) + 1, _size);
731 class ReadLdrdBenchmark : public SingleBufferBandwidthBenchmark {
733 ReadLdrdBenchmark() : SingleBufferBandwidthBenchmark() { }
734 virtual ~ReadLdrdBenchmark() {}
736 const char *getName() { return "ldrd"; }
739 // Write a given value using strd.
740 void bench(size_t num_loops) {
742 "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
749 "mov r3, r1, lsr #5\n"
753 "ldrd r4, r5, [r0]\n"
754 "ldrd r4, r5, [r0, #8]\n"
755 "ldrd r4, r5, [r0, #16]\n"
756 "ldrd r4, r5, [r0, #24]\n"
764 "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
765 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
769 class ReadLdmiaBenchmark : public SingleBufferBandwidthBenchmark {
771 ReadLdmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
772 virtual ~ReadLdmiaBenchmark() {}
774 const char *getName() { return "ldmia"; }
777 // Write a given value using stmia.
778 void bench(size_t num_loops) {
780 "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
787 "mov r3, r1, lsr #5\n"
791 "ldmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
798 "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
799 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
803 class ReadVld1Benchmark : public SingleBufferBandwidthBenchmark {
805 ReadVld1Benchmark() : SingleBufferBandwidthBenchmark() { }
806 virtual ~ReadVld1Benchmark() {}
808 const char *getName() { return "vld1"; }
810 bool usesNeon() { return true; }
813 // Write a given value using vst.
814 void bench(size_t num_loops) {
815 #if defined(__ARM_NEON__)
817 "stmfd sp!, {r0,r1,r2,r3}\n"
824 "mov r3, r1, lsr #5\n"
828 "vld1.8 {d0-d3}, [r0:128]!\n"
835 "ldmfd sp!, {r0,r1,r2,r3}\n"
836 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
841 class ReadVldrBenchmark : public SingleBufferBandwidthBenchmark {
843 ReadVldrBenchmark() : SingleBufferBandwidthBenchmark() { }
844 virtual ~ReadVldrBenchmark() {}
846 const char *getName() { return "vldr"; }
848 bool usesNeon() { return true; }
851 // Write a given value using vst.
852 void bench(size_t num_loops) {
853 #if defined(__ARM_NEON__)
855 "stmfd sp!, {r0,r1,r2,r3}\n"
862 "mov r3, r1, lsr #5\n"
865 "vldr d0, [r0, #0]\n"
867 "vldr d1, [r0, #8]\n"
868 "vldr d0, [r0, #16]\n"
869 "vldr d1, [r0, #24]\n"
877 "ldmfd sp!, {r0,r1,r2,r3}\n"
878 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
884 class ReadVldmiaBenchmark : public SingleBufferBandwidthBenchmark {
886 ReadVldmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
887 virtual ~ReadVldmiaBenchmark() {}
889 const char *getName() { return "vldmia"; }
891 bool usesNeon() { return true; }
894 // Write a given value using vstmia.
895 void bench(size_t num_loops) {
896 #if defined(__ARM_NEON__)
898 "stmfd sp!, {r0,r1,r2,r3}\n"
905 "mov r3, r1, lsr #5\n"
909 "vldmia r0!, {d0-d3}\n"
916 "ldmfd sp!, {r0,r1,r2,r3}\n"
917 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
922 #endif // __BANDWIDTH_H__