2 * Copyright (C) 2013 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #ifndef __BANDWIDTH_H__
18 #define __BANDWIDTH_H__
22 // Bandwidth Class definitions.
23 class BandwidthBenchmark {
27 _num_warm_loops(DEFAULT_NUM_WARM_LOOPS),
28 _num_loops(DEFAULT_NUM_LOOPS) {}
29 virtual ~BandwidthBenchmark() {}
39 bench(_num_warm_loops);
41 nsecs_t t = system_time();
43 t = system_time() - t;
45 _mb_per_sec = (_size*(_num_loops/_BYTES_PER_MB))/(t/_NUM_NS_PER_SEC);
50 bool canRun() { return !usesNeon() || isNeonSupported(); }
52 virtual bool setSize(size_t size) = 0;
54 virtual const char *getName() = 0;
56 virtual bool verify() = 0;
58 virtual bool usesNeon() { return false; }
60 bool isNeonSupported() {
61 #if defined(__ARM_NEON__)
68 // Accessors/mutators.
69 double mb_per_sec() { return _mb_per_sec; }
70 size_t num_warm_loops() { return _num_warm_loops; }
71 size_t num_loops() { return _num_loops; }
72 size_t size() { return _size; }
74 void set_num_warm_loops(size_t num_warm_loops) {
75 _num_warm_loops = num_warm_loops;
77 void set_num_loops(size_t num_loops) { _num_loops = num_loops; }
80 static const unsigned int DEFAULT_NUM_WARM_LOOPS = 1000000;
81 static const unsigned int DEFAULT_NUM_LOOPS = 20000000;
84 virtual void bench(size_t num_loops) = 0;
88 size_t _num_warm_loops;
93 static const double _NUM_NS_PER_SEC = 1000000000.0;
94 static const double _BYTES_PER_MB = 1024.0* 1024.0;
97 class CopyBandwidthBenchmark : public BandwidthBenchmark {
99 CopyBandwidthBenchmark() : BandwidthBenchmark(), _src(NULL), _dst(NULL) { }
101 bool setSize(size_t size) {
110 _size = DEFAULT_COPY_SIZE;
115 _src = reinterpret_cast<char*>(memalign(64, _size));
117 perror("Failed to allocate memory for test.");
120 _dst = reinterpret_cast<char*>(memalign(64, _size));
122 perror("Failed to allocate memory for test.");
128 virtual ~CopyBandwidthBenchmark() {
140 memset(_src, 0x23, _size);
141 memset(_dst, 0, _size);
143 if (memcmp(_src, _dst, _size) != 0) {
144 printf("Buffers failed to compare after one loop.\n");
148 memset(_src, 0x23, _size);
149 memset(_dst, 0, _size);
152 if (memcmp(_src, _dst, _size) != 0) {
153 printf("Buffers failed to compare after two loops.\n");
164 static const unsigned int DEFAULT_COPY_SIZE = 8000;
167 class CopyLdrdStrdBenchmark : public CopyBandwidthBenchmark {
169 CopyLdrdStrdBenchmark() : CopyBandwidthBenchmark() { }
170 virtual ~CopyLdrdStrdBenchmark() {}
172 const char *getName() { return "ldrd/strd"; }
175 // Copy using ldrd/strd instructions.
176 void bench(size_t num_loops) {
178 "stmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
186 "mov r4, r2, lsr #6\n"
189 "ldrd r6, r7, [r0]\n"
190 "strd r6, r7, [r1]\n"
191 "ldrd r6, r7, [r0, #8]\n"
192 "strd r6, r7, [r1, #8]\n"
193 "ldrd r6, r7, [r0, #16]\n"
194 "strd r6, r7, [r1, #16]\n"
195 "ldrd r6, r7, [r0, #24]\n"
196 "strd r6, r7, [r1, #24]\n"
197 "ldrd r6, r7, [r0, #32]\n"
198 "strd r6, r7, [r1, #32]\n"
199 "ldrd r6, r7, [r0, #40]\n"
200 "strd r6, r7, [r1, #40]\n"
201 "ldrd r6, r7, [r0, #48]\n"
202 "strd r6, r7, [r1, #48]\n"
203 "ldrd r6, r7, [r0, #56]\n"
204 "strd r6, r7, [r1, #56]\n"
216 "ldmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
217 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
221 class CopyLdmiaStmiaBenchmark : public CopyBandwidthBenchmark {
223 CopyLdmiaStmiaBenchmark() : CopyBandwidthBenchmark() { }
224 virtual ~CopyLdmiaStmiaBenchmark() {}
226 const char *getName() { return "ldmia/stmia"; }
229 // Copy using ldmia/stmia instructions.
230 void bench(size_t num_loops) {
232 "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
240 "mov r4, r2, lsr #6\n"
243 "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
244 "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
246 "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
247 "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
255 "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
256 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
260 class CopyVld1Vst1Benchmark : public CopyBandwidthBenchmark {
262 CopyVld1Vst1Benchmark() : CopyBandwidthBenchmark() { }
263 virtual ~CopyVld1Vst1Benchmark() {}
265 const char *getName() { return "vld1/vst1"; }
267 bool usesNeon() { return true; }
270 // Copy using vld1/vst1 instructions.
271 void bench(size_t num_loops) {
272 #if defined(__ARM_NEON__)
274 "stmfd sp!, {r0,r1,r2,r3,r4}\n"
282 "mov r4, r2, lsr #6\n"
285 "vld1.8 {d0-d3}, [r0]!\n"
286 "vld1.8 {d4-d7}, [r0]!\n"
288 "vst1.8 {d0-d3}, [r1:128]!\n"
289 "vst1.8 {d4-d7}, [r1:128]!\n"
297 "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
298 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
303 class CopyVldrVstrBenchmark : public CopyBandwidthBenchmark {
305 CopyVldrVstrBenchmark() : CopyBandwidthBenchmark() { }
306 virtual ~CopyVldrVstrBenchmark() {}
308 const char *getName() { return "vldr/vstr"; }
310 bool usesNeon() { return true; }
313 // Copy using vldr/vstr instructions.
314 void bench(size_t num_loops) {
315 #if defined(__ARM_NEON__)
317 "stmfd sp!, {r0,r1,r2,r3,r4}\n"
325 "mov r4, r2, lsr #6\n"
328 "vldr d0, [r0, #0]\n"
330 "vldr d1, [r0, #8]\n"
331 "vstr d0, [r1, #0]\n"
332 "vldr d0, [r0, #16]\n"
333 "vstr d1, [r1, #8]\n"
334 "vldr d1, [r0, #24]\n"
335 "vstr d0, [r1, #16]\n"
336 "vldr d0, [r0, #32]\n"
337 "vstr d1, [r1, #24]\n"
338 "vldr d1, [r0, #40]\n"
339 "vstr d0, [r1, #32]\n"
340 "vldr d0, [r0, #48]\n"
341 "vstr d1, [r1, #40]\n"
342 "vldr d1, [r0, #56]\n"
343 "vstr d0, [r1, #48]\n"
345 "vstr d1, [r1, #56]\n"
354 "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
355 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
360 class CopyVldmiaVstmiaBenchmark : public CopyBandwidthBenchmark {
362 CopyVldmiaVstmiaBenchmark() : CopyBandwidthBenchmark() { }
363 virtual ~CopyVldmiaVstmiaBenchmark() {}
365 const char *getName() { return "vldmia/vstmia"; }
367 bool usesNeon() { return true; }
370 // Copy using vldmia/vstmia instructions.
371 void bench(size_t num_loops) {
372 #if defined(__ARM_NEON__)
374 "stmfd sp!, {r0,r1,r2,r3,r4}\n"
382 "mov r4, r2, lsr #6\n"
385 "vldmia r0!, {d0-d7}\n"
387 "vstmia r1!, {d0-d7}\n"
395 "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
396 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
401 class MemcpyBenchmark : public CopyBandwidthBenchmark {
403 MemcpyBenchmark() : CopyBandwidthBenchmark() { }
404 virtual ~MemcpyBenchmark() {}
406 const char *getName() { return "memcpy"; }
409 void bench(size_t num_loops) {
410 for (size_t i = 0; i < num_loops; i++) {
411 memcpy(_dst, _src, _size);
416 class SingleBufferBandwidthBenchmark : public BandwidthBenchmark {
418 SingleBufferBandwidthBenchmark() : BandwidthBenchmark(), _buffer(NULL) { }
419 virtual ~SingleBufferBandwidthBenchmark() {
426 bool setSize(size_t size) {
433 _size = DEFAULT_SINGLE_BUFFER_SIZE;
438 _buffer = reinterpret_cast<char*>(memalign(64, _size));
440 perror("Failed to allocate memory for test.");
443 memset(_buffer, 0, _size);
448 bool verify() { return true; }
453 static const unsigned int DEFAULT_SINGLE_BUFFER_SIZE = 16000;
456 class WriteBandwidthBenchmark : public SingleBufferBandwidthBenchmark {
458 WriteBandwidthBenchmark() : SingleBufferBandwidthBenchmark() { }
459 virtual ~WriteBandwidthBenchmark() { }
462 memset(_buffer, 0, _size);
464 for (size_t i = 0; i < _size; i++) {
465 if (_buffer[i] != 1) {
466 printf("Buffer failed to compare after one loop.\n");
471 memset(_buffer, 0, _size);
473 for (size_t i = 0; i < _size; i++) {
474 if (_buffer[i] != 2) {
475 printf("Buffer failed to compare after two loops.\n");
484 class WriteStrdBenchmark : public WriteBandwidthBenchmark {
486 WriteStrdBenchmark() : WriteBandwidthBenchmark() { }
487 virtual ~WriteStrdBenchmark() {}
489 const char *getName() { return "strd"; }
492 // Write a given value using strd.
493 void bench(size_t num_loops) {
495 "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
505 "mov r3, r1, lsr #5\n"
507 "add r4, r4, #0x01010101\n"
512 "strd r4, r5, [r0]\n"
513 "strd r4, r5, [r0, #8]\n"
514 "strd r4, r5, [r0, #16]\n"
515 "strd r4, r5, [r0, #24]\n"
523 "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
524 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
528 class WriteStmiaBenchmark : public WriteBandwidthBenchmark {
530 WriteStmiaBenchmark() : WriteBandwidthBenchmark() { }
531 virtual ~WriteStmiaBenchmark() {}
533 const char *getName() { return "stmia"; }
536 // Write a given value using stmia.
537 void bench(size_t num_loops) {
539 "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
548 "mov r3, r1, lsr #5\n"
550 "add r4, r4, #0x01010101\n"
561 "stmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
568 "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
569 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
573 class WriteVst1Benchmark : public WriteBandwidthBenchmark {
575 WriteVst1Benchmark() : WriteBandwidthBenchmark() { }
576 virtual ~WriteVst1Benchmark() {}
578 const char *getName() { return "vst1"; }
580 bool usesNeon() { return true; }
583 // Write a given value using vst.
584 void bench(size_t num_loops) {
585 #if defined(__ARM_NEON__)
587 "stmfd sp!, {r0,r1,r2,r3,r4}\n"
595 "mov r3, r1, lsr #5\n"
605 "vst1.8 {d0-d3}, [r0:128]!\n"
612 "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
613 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
618 class WriteVstrBenchmark : public WriteBandwidthBenchmark {
620 WriteVstrBenchmark() : WriteBandwidthBenchmark() { }
621 virtual ~WriteVstrBenchmark() {}
623 const char *getName() { return "vstr"; }
625 bool usesNeon() { return true; }
628 // Write a given value using vst.
629 void bench(size_t num_loops) {
630 #if defined(__ARM_NEON__)
632 "stmfd sp!, {r0,r1,r2,r3,r4}\n"
640 "mov r3, r1, lsr #5\n"
649 "vstr d0, [r0, #0]\n"
651 "vstr d1, [r0, #8]\n"
652 "vstr d0, [r0, #16]\n"
653 "vstr d1, [r0, #24]\n"
661 "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
662 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
667 class WriteVstmiaBenchmark : public WriteBandwidthBenchmark {
669 WriteVstmiaBenchmark() : WriteBandwidthBenchmark() { }
670 virtual ~WriteVstmiaBenchmark() {}
672 const char *getName() { return "vstmia"; }
674 bool usesNeon() { return true; }
677 // Write a given value using vstmia.
678 void bench(size_t num_loops) {
679 #if defined(__ARM_NEON__)
681 "stmfd sp!, {r0,r1,r2,r3,r4}\n"
689 "mov r3, r1, lsr #5\n"
699 "vstmia r0!, {d0-d3}\n"
706 "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
707 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
712 class MemsetBenchmark : public WriteBandwidthBenchmark {
714 MemsetBenchmark() : WriteBandwidthBenchmark() { }
715 virtual ~MemsetBenchmark() {}
717 const char *getName() { return "memset"; }
720 void bench(size_t num_loops) {
721 for (size_t i = 0; i < num_loops; i++) {
722 memset(_buffer, (i % 255) + 1, _size);
727 class ReadLdrdBenchmark : public SingleBufferBandwidthBenchmark {
729 ReadLdrdBenchmark() : SingleBufferBandwidthBenchmark() { }
730 virtual ~ReadLdrdBenchmark() {}
732 const char *getName() { return "ldrd"; }
735 // Write a given value using strd.
736 void bench(size_t num_loops) {
738 "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
745 "mov r3, r1, lsr #5\n"
749 "ldrd r4, r5, [r0]\n"
750 "ldrd r4, r5, [r0, #8]\n"
751 "ldrd r4, r5, [r0, #16]\n"
752 "ldrd r4, r5, [r0, #24]\n"
760 "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
761 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
765 class ReadLdmiaBenchmark : public SingleBufferBandwidthBenchmark {
767 ReadLdmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
768 virtual ~ReadLdmiaBenchmark() {}
770 const char *getName() { return "ldmia"; }
773 // Write a given value using stmia.
774 void bench(size_t num_loops) {
776 "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
783 "mov r3, r1, lsr #5\n"
787 "ldmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
794 "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
795 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
799 class ReadVld1Benchmark : public SingleBufferBandwidthBenchmark {
801 ReadVld1Benchmark() : SingleBufferBandwidthBenchmark() { }
802 virtual ~ReadVld1Benchmark() {}
804 const char *getName() { return "vld1"; }
806 bool usesNeon() { return true; }
809 // Write a given value using vst.
810 void bench(size_t num_loops) {
811 #if defined(__ARM_NEON__)
813 "stmfd sp!, {r0,r1,r2,r3}\n"
820 "mov r3, r1, lsr #5\n"
824 "vld1.8 {d0-d3}, [r0:128]!\n"
831 "ldmfd sp!, {r0,r1,r2,r3}\n"
832 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
837 class ReadVldrBenchmark : public SingleBufferBandwidthBenchmark {
839 ReadVldrBenchmark() : SingleBufferBandwidthBenchmark() { }
840 virtual ~ReadVldrBenchmark() {}
842 const char *getName() { return "vldr"; }
844 bool usesNeon() { return true; }
847 // Write a given value using vst.
848 void bench(size_t num_loops) {
849 #if defined(__ARM_NEON__)
851 "stmfd sp!, {r0,r1,r2,r3}\n"
858 "mov r3, r1, lsr #5\n"
861 "vldr d0, [r0, #0]\n"
863 "vldr d1, [r0, #8]\n"
864 "vldr d0, [r0, #16]\n"
865 "vldr d1, [r0, #24]\n"
873 "ldmfd sp!, {r0,r1,r2,r3}\n"
874 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
880 class ReadVldmiaBenchmark : public SingleBufferBandwidthBenchmark {
882 ReadVldmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
883 virtual ~ReadVldmiaBenchmark() {}
885 const char *getName() { return "vldmia"; }
887 bool usesNeon() { return true; }
890 // Write a given value using vstmia.
891 void bench(size_t num_loops) {
892 #if defined(__ARM_NEON__)
894 "stmfd sp!, {r0,r1,r2,r3}\n"
901 "mov r3, r1, lsr #5\n"
905 "vldmia r0!, {d0-d3}\n"
912 "ldmfd sp!, {r0,r1,r2,r3}\n"
913 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
918 #endif // __BANDWIDTH_H__