2 * Copyright (C) 2013 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #ifndef __BANDWIDTH_H__
18 #define __BANDWIDTH_H__
22 #include "utils/Compat.h"
25 // Bandwidth Class definitions.
26 class BandwidthBenchmark {
30 _num_warm_loops(DEFAULT_NUM_WARM_LOOPS),
31 _num_loops(DEFAULT_NUM_LOOPS) {}
32 virtual ~BandwidthBenchmark() {}
42 bench(_num_warm_loops);
44 nsecs_t t = system_time();
46 t = system_time() - t;
48 _mb_per_sec = (_size*(_num_loops/_BYTES_PER_MB))/(t/_NUM_NS_PER_SEC);
53 bool canRun() { return !usesNeon() || isNeonSupported(); }
55 virtual bool setSize(size_t size) = 0;
57 virtual const char *getName() = 0;
59 virtual bool verify() = 0;
61 virtual bool usesNeon() { return false; }
63 bool isNeonSupported() {
64 #if defined(__ARM_NEON__)
71 // Accessors/mutators.
72 double mb_per_sec() { return _mb_per_sec; }
73 size_t num_warm_loops() { return _num_warm_loops; }
74 size_t num_loops() { return _num_loops; }
75 size_t size() { return _size; }
77 void set_num_warm_loops(size_t num_warm_loops) {
78 _num_warm_loops = num_warm_loops;
80 void set_num_loops(size_t num_loops) { _num_loops = num_loops; }
83 static const unsigned int DEFAULT_NUM_WARM_LOOPS = 1000000;
84 static const unsigned int DEFAULT_NUM_LOOPS = 20000000;
87 virtual void bench(size_t num_loops) = 0;
91 size_t _num_warm_loops;
96 static const CONSTEXPR double _NUM_NS_PER_SEC = 1000000000.0;
97 static const CONSTEXPR double _BYTES_PER_MB = 1024.0* 1024.0;
100 class CopyBandwidthBenchmark : public BandwidthBenchmark {
102 CopyBandwidthBenchmark() : BandwidthBenchmark(), _src(NULL), _dst(NULL) { }
104 bool setSize(size_t size) {
113 _size = DEFAULT_COPY_SIZE;
118 _src = reinterpret_cast<char*>(memalign(64, _size));
120 perror("Failed to allocate memory for test.");
123 _dst = reinterpret_cast<char*>(memalign(64, _size));
125 perror("Failed to allocate memory for test.");
131 virtual ~CopyBandwidthBenchmark() {
143 memset(_src, 0x23, _size);
144 memset(_dst, 0, _size);
146 if (memcmp(_src, _dst, _size) != 0) {
147 printf("Buffers failed to compare after one loop.\n");
151 memset(_src, 0x23, _size);
152 memset(_dst, 0, _size);
155 if (memcmp(_src, _dst, _size) != 0) {
156 printf("Buffers failed to compare after two loops.\n");
167 static const unsigned int DEFAULT_COPY_SIZE = 8000;
170 class CopyLdrdStrdBenchmark : public CopyBandwidthBenchmark {
172 CopyLdrdStrdBenchmark() : CopyBandwidthBenchmark() { }
173 virtual ~CopyLdrdStrdBenchmark() {}
175 const char *getName() { return "ldrd/strd"; }
178 // Copy using ldrd/strd instructions.
179 void bench(size_t num_loops) {
181 "stmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
189 "mov r4, r2, lsr #6\n"
192 "ldrd r6, r7, [r0]\n"
193 "strd r6, r7, [r1]\n"
194 "ldrd r6, r7, [r0, #8]\n"
195 "strd r6, r7, [r1, #8]\n"
196 "ldrd r6, r7, [r0, #16]\n"
197 "strd r6, r7, [r1, #16]\n"
198 "ldrd r6, r7, [r0, #24]\n"
199 "strd r6, r7, [r1, #24]\n"
200 "ldrd r6, r7, [r0, #32]\n"
201 "strd r6, r7, [r1, #32]\n"
202 "ldrd r6, r7, [r0, #40]\n"
203 "strd r6, r7, [r1, #40]\n"
204 "ldrd r6, r7, [r0, #48]\n"
205 "strd r6, r7, [r1, #48]\n"
206 "ldrd r6, r7, [r0, #56]\n"
207 "strd r6, r7, [r1, #56]\n"
219 "ldmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
220 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
224 class CopyLdmiaStmiaBenchmark : public CopyBandwidthBenchmark {
226 CopyLdmiaStmiaBenchmark() : CopyBandwidthBenchmark() { }
227 virtual ~CopyLdmiaStmiaBenchmark() {}
229 const char *getName() { return "ldmia/stmia"; }
232 // Copy using ldmia/stmia instructions.
233 void bench(size_t num_loops) {
235 "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
243 "mov r4, r2, lsr #6\n"
246 "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
247 "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
249 "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
250 "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
258 "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
259 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
263 class CopyVld1Vst1Benchmark : public CopyBandwidthBenchmark {
265 CopyVld1Vst1Benchmark() : CopyBandwidthBenchmark() { }
266 virtual ~CopyVld1Vst1Benchmark() {}
268 const char *getName() { return "vld1/vst1"; }
270 bool usesNeon() { return true; }
273 // Copy using vld1/vst1 instructions.
274 void bench(size_t num_loops) {
275 #if defined(__ARM_NEON__)
277 "stmfd sp!, {r0,r1,r2,r3,r4}\n"
285 "mov r4, r2, lsr #6\n"
288 "vld1.8 {d0-d3}, [r0]!\n"
289 "vld1.8 {d4-d7}, [r0]!\n"
291 "vst1.8 {d0-d3}, [r1:128]!\n"
292 "vst1.8 {d4-d7}, [r1:128]!\n"
300 "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
301 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
306 class CopyVldrVstrBenchmark : public CopyBandwidthBenchmark {
308 CopyVldrVstrBenchmark() : CopyBandwidthBenchmark() { }
309 virtual ~CopyVldrVstrBenchmark() {}
311 const char *getName() { return "vldr/vstr"; }
313 bool usesNeon() { return true; }
316 // Copy using vldr/vstr instructions.
317 void bench(size_t num_loops) {
318 #if defined(__ARM_NEON__)
320 "stmfd sp!, {r0,r1,r2,r3,r4}\n"
328 "mov r4, r2, lsr #6\n"
331 "vldr d0, [r0, #0]\n"
333 "vldr d1, [r0, #8]\n"
334 "vstr d0, [r1, #0]\n"
335 "vldr d0, [r0, #16]\n"
336 "vstr d1, [r1, #8]\n"
337 "vldr d1, [r0, #24]\n"
338 "vstr d0, [r1, #16]\n"
339 "vldr d0, [r0, #32]\n"
340 "vstr d1, [r1, #24]\n"
341 "vldr d1, [r0, #40]\n"
342 "vstr d0, [r1, #32]\n"
343 "vldr d0, [r0, #48]\n"
344 "vstr d1, [r1, #40]\n"
345 "vldr d1, [r0, #56]\n"
346 "vstr d0, [r1, #48]\n"
348 "vstr d1, [r1, #56]\n"
357 "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
358 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
363 class CopyVldmiaVstmiaBenchmark : public CopyBandwidthBenchmark {
365 CopyVldmiaVstmiaBenchmark() : CopyBandwidthBenchmark() { }
366 virtual ~CopyVldmiaVstmiaBenchmark() {}
368 const char *getName() { return "vldmia/vstmia"; }
370 bool usesNeon() { return true; }
373 // Copy using vldmia/vstmia instructions.
374 void bench(size_t num_loops) {
375 #if defined(__ARM_NEON__)
377 "stmfd sp!, {r0,r1,r2,r3,r4}\n"
385 "mov r4, r2, lsr #6\n"
388 "vldmia r0!, {d0-d7}\n"
390 "vstmia r1!, {d0-d7}\n"
398 "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
399 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
404 class MemcpyBenchmark : public CopyBandwidthBenchmark {
406 MemcpyBenchmark() : CopyBandwidthBenchmark() { }
407 virtual ~MemcpyBenchmark() {}
409 const char *getName() { return "memcpy"; }
412 void bench(size_t num_loops) {
413 for (size_t i = 0; i < num_loops; i++) {
414 memcpy(_dst, _src, _size);
419 class SingleBufferBandwidthBenchmark : public BandwidthBenchmark {
421 SingleBufferBandwidthBenchmark() : BandwidthBenchmark(), _buffer(NULL) { }
422 virtual ~SingleBufferBandwidthBenchmark() {
429 bool setSize(size_t size) {
436 _size = DEFAULT_SINGLE_BUFFER_SIZE;
441 _buffer = reinterpret_cast<char*>(memalign(64, _size));
443 perror("Failed to allocate memory for test.");
446 memset(_buffer, 0, _size);
451 bool verify() { return true; }
456 static const unsigned int DEFAULT_SINGLE_BUFFER_SIZE = 16000;
459 class WriteBandwidthBenchmark : public SingleBufferBandwidthBenchmark {
461 WriteBandwidthBenchmark() : SingleBufferBandwidthBenchmark() { }
462 virtual ~WriteBandwidthBenchmark() { }
465 memset(_buffer, 0, _size);
467 for (size_t i = 0; i < _size; i++) {
468 if (_buffer[i] != 1) {
469 printf("Buffer failed to compare after one loop.\n");
474 memset(_buffer, 0, _size);
476 for (size_t i = 0; i < _size; i++) {
477 if (_buffer[i] != 2) {
478 printf("Buffer failed to compare after two loops.\n");
487 class WriteStrdBenchmark : public WriteBandwidthBenchmark {
489 WriteStrdBenchmark() : WriteBandwidthBenchmark() { }
490 virtual ~WriteStrdBenchmark() {}
492 const char *getName() { return "strd"; }
495 // Write a given value using strd.
496 void bench(size_t num_loops) {
498 "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
508 "mov r3, r1, lsr #5\n"
510 "add r4, r4, #0x01010101\n"
515 "strd r4, r5, [r0]\n"
516 "strd r4, r5, [r0, #8]\n"
517 "strd r4, r5, [r0, #16]\n"
518 "strd r4, r5, [r0, #24]\n"
526 "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
527 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
531 class WriteStmiaBenchmark : public WriteBandwidthBenchmark {
533 WriteStmiaBenchmark() : WriteBandwidthBenchmark() { }
534 virtual ~WriteStmiaBenchmark() {}
536 const char *getName() { return "stmia"; }
539 // Write a given value using stmia.
540 void bench(size_t num_loops) {
542 "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
551 "mov r3, r1, lsr #5\n"
553 "add r4, r4, #0x01010101\n"
564 "stmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
571 "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
572 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
576 class WriteVst1Benchmark : public WriteBandwidthBenchmark {
578 WriteVst1Benchmark() : WriteBandwidthBenchmark() { }
579 virtual ~WriteVst1Benchmark() {}
581 const char *getName() { return "vst1"; }
583 bool usesNeon() { return true; }
586 // Write a given value using vst.
587 void bench(size_t num_loops) {
588 #if defined(__ARM_NEON__)
590 "stmfd sp!, {r0,r1,r2,r3,r4}\n"
598 "mov r3, r1, lsr #5\n"
608 "vst1.8 {d0-d3}, [r0:128]!\n"
615 "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
616 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
621 class WriteVstrBenchmark : public WriteBandwidthBenchmark {
623 WriteVstrBenchmark() : WriteBandwidthBenchmark() { }
624 virtual ~WriteVstrBenchmark() {}
626 const char *getName() { return "vstr"; }
628 bool usesNeon() { return true; }
631 // Write a given value using vst.
632 void bench(size_t num_loops) {
633 #if defined(__ARM_NEON__)
635 "stmfd sp!, {r0,r1,r2,r3,r4}\n"
643 "mov r3, r1, lsr #5\n"
652 "vstr d0, [r0, #0]\n"
654 "vstr d1, [r0, #8]\n"
655 "vstr d0, [r0, #16]\n"
656 "vstr d1, [r0, #24]\n"
664 "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
665 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
670 class WriteVstmiaBenchmark : public WriteBandwidthBenchmark {
672 WriteVstmiaBenchmark() : WriteBandwidthBenchmark() { }
673 virtual ~WriteVstmiaBenchmark() {}
675 const char *getName() { return "vstmia"; }
677 bool usesNeon() { return true; }
680 // Write a given value using vstmia.
681 void bench(size_t num_loops) {
682 #if defined(__ARM_NEON__)
684 "stmfd sp!, {r0,r1,r2,r3,r4}\n"
692 "mov r3, r1, lsr #5\n"
702 "vstmia r0!, {d0-d3}\n"
709 "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
710 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
715 class MemsetBenchmark : public WriteBandwidthBenchmark {
717 MemsetBenchmark() : WriteBandwidthBenchmark() { }
718 virtual ~MemsetBenchmark() {}
720 const char *getName() { return "memset"; }
723 void bench(size_t num_loops) {
724 for (size_t i = 0; i < num_loops; i++) {
725 memset(_buffer, (i % 255) + 1, _size);
730 class ReadLdrdBenchmark : public SingleBufferBandwidthBenchmark {
732 ReadLdrdBenchmark() : SingleBufferBandwidthBenchmark() { }
733 virtual ~ReadLdrdBenchmark() {}
735 const char *getName() { return "ldrd"; }
738 // Write a given value using strd.
739 void bench(size_t num_loops) {
741 "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
748 "mov r3, r1, lsr #5\n"
752 "ldrd r4, r5, [r0]\n"
753 "ldrd r4, r5, [r0, #8]\n"
754 "ldrd r4, r5, [r0, #16]\n"
755 "ldrd r4, r5, [r0, #24]\n"
763 "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
764 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
768 class ReadLdmiaBenchmark : public SingleBufferBandwidthBenchmark {
770 ReadLdmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
771 virtual ~ReadLdmiaBenchmark() {}
773 const char *getName() { return "ldmia"; }
776 // Write a given value using stmia.
777 void bench(size_t num_loops) {
779 "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
786 "mov r3, r1, lsr #5\n"
790 "ldmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
797 "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
798 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
802 class ReadVld1Benchmark : public SingleBufferBandwidthBenchmark {
804 ReadVld1Benchmark() : SingleBufferBandwidthBenchmark() { }
805 virtual ~ReadVld1Benchmark() {}
807 const char *getName() { return "vld1"; }
809 bool usesNeon() { return true; }
812 // Write a given value using vst.
813 void bench(size_t num_loops) {
814 #if defined(__ARM_NEON__)
816 "stmfd sp!, {r0,r1,r2,r3}\n"
823 "mov r3, r1, lsr #5\n"
827 "vld1.8 {d0-d3}, [r0:128]!\n"
834 "ldmfd sp!, {r0,r1,r2,r3}\n"
835 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
840 class ReadVldrBenchmark : public SingleBufferBandwidthBenchmark {
842 ReadVldrBenchmark() : SingleBufferBandwidthBenchmark() { }
843 virtual ~ReadVldrBenchmark() {}
845 const char *getName() { return "vldr"; }
847 bool usesNeon() { return true; }
850 // Write a given value using vst.
851 void bench(size_t num_loops) {
852 #if defined(__ARM_NEON__)
854 "stmfd sp!, {r0,r1,r2,r3}\n"
861 "mov r3, r1, lsr #5\n"
864 "vldr d0, [r0, #0]\n"
866 "vldr d1, [r0, #8]\n"
867 "vldr d0, [r0, #16]\n"
868 "vldr d1, [r0, #24]\n"
876 "ldmfd sp!, {r0,r1,r2,r3}\n"
877 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
883 class ReadVldmiaBenchmark : public SingleBufferBandwidthBenchmark {
885 ReadVldmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
886 virtual ~ReadVldmiaBenchmark() {}
888 const char *getName() { return "vldmia"; }
890 bool usesNeon() { return true; }
893 // Write a given value using vstmia.
894 void bench(size_t num_loops) {
895 #if defined(__ARM_NEON__)
897 "stmfd sp!, {r0,r1,r2,r3}\n"
904 "mov r3, r1, lsr #5\n"
908 "vldmia r0!, {d0-d3}\n"
915 "ldmfd sp!, {r0,r1,r2,r3}\n"
916 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
921 #endif // __BANDWIDTH_H__