2 * Copyright (C) 2013 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #ifndef __BANDWIDTH_H__
18 #define __BANDWIDTH_H__
22 // Bandwidth Class definitions.
23 class BandwidthBenchmark {
27 _num_warm_loops(DEFAULT_NUM_WARM_LOOPS),
28 _num_loops(DEFAULT_NUM_LOOPS) {}
29 virtual ~BandwidthBenchmark() {}
39 bench(_num_warm_loops);
41 nsecs_t t = system_time();
43 t = system_time() - t;
45 _mb_per_sec = (_size*(_num_loops/_BYTES_PER_MB))/(t/_NUM_NS_PER_SEC);
50 bool canRun() { return !usesNeon() || isNeonSupported(); }
52 virtual bool setSize(size_t size) = 0;
54 virtual const char *getName() = 0;
56 virtual bool verify() = 0;
58 virtual bool usesNeon() { return false; }
60 bool isNeonSupported() {
61 #if defined(__ARM_NEON__)
68 // Accessors/mutators.
69 double mb_per_sec() { return _mb_per_sec; }
70 size_t num_warm_loops() { return _num_warm_loops; }
71 size_t num_loops() { return _num_loops; }
72 size_t size() { return _size; }
74 void set_num_warm_loops(size_t num_warm_loops) {
75 _num_warm_loops = num_warm_loops;
77 void set_num_loops(size_t num_loops) { _num_loops = num_loops; }
80 static const unsigned int DEFAULT_NUM_WARM_LOOPS = 1000000;
81 static const unsigned int DEFAULT_NUM_LOOPS = 20000000;
84 virtual void bench(size_t num_loops) = 0;
88 size_t _num_warm_loops;
93 static const double _NUM_NS_PER_SEC = 1000000000.0;
94 static const double _BYTES_PER_MB = 1024.0* 1024.0;
97 class CopyBandwidthBenchmark : public BandwidthBenchmark {
99 CopyBandwidthBenchmark() : BandwidthBenchmark(), _src(NULL), _dst(NULL) { }
101 bool setSize(size_t size) {
110 _size = DEFAULT_COPY_SIZE;
115 _src = reinterpret_cast<char*>(memalign(64, _size));
117 perror("Failed to allocate memory for test.");
120 _dst = reinterpret_cast<char*>(memalign(64, _size));
122 perror("Failed to allocate memory for test.");
128 virtual ~CopyBandwidthBenchmark() {
140 memset(_src, 0x23, _size);
141 memset(_dst, 0, _size);
143 if (memcmp(_src, _dst, _size) != 0) {
144 printf("Strings failed to compare after one loop.\n");
148 memset(_src, 0x23, _size);
149 memset(_dst, 0, _size);
152 if (memcmp(_src, _dst, _size) != 0) {
153 printf("Strings failed to compare after two loops.\n");
164 static const unsigned int DEFAULT_COPY_SIZE = 8000;
167 class CopyLdrdStrdBenchmark : public CopyBandwidthBenchmark {
169 CopyLdrdStrdBenchmark() : CopyBandwidthBenchmark() { }
170 virtual ~CopyLdrdStrdBenchmark() {}
172 const char *getName() { return "ldrd/strd"; }
175 // Copy using ldrd/strd instructions.
176 void bench(size_t num_loops) {
178 "stmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
186 "mov r4, r2, lsr #6\n"
189 "ldrd r6, r7, [r0]\n"
190 "strd r6, r7, [r1]\n"
191 "ldrd r6, r7, [r0, #8]\n"
192 "strd r6, r7, [r1, #8]\n"
193 "ldrd r6, r7, [r0, #16]\n"
194 "strd r6, r7, [r1, #16]\n"
195 "ldrd r6, r7, [r0, #24]\n"
196 "strd r6, r7, [r1, #24]\n"
197 "ldrd r6, r7, [r0, #32]\n"
198 "strd r6, r7, [r1, #32]\n"
199 "ldrd r6, r7, [r0, #40]\n"
200 "strd r6, r7, [r1, #40]\n"
201 "ldrd r6, r7, [r0, #48]\n"
202 "strd r6, r7, [r1, #48]\n"
203 "ldrd r6, r7, [r0, #56]\n"
204 "strd r6, r7, [r1, #56]\n"
216 "ldmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
217 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
221 class CopyLdmiaStmiaBenchmark : public CopyBandwidthBenchmark {
223 CopyLdmiaStmiaBenchmark() : CopyBandwidthBenchmark() { }
224 virtual ~CopyLdmiaStmiaBenchmark() {}
226 const char *getName() { return "ldmia/stmia"; }
229 // Copy using ldmia/stmia instructions.
230 void bench(size_t num_loops) {
232 "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
240 "mov r4, r2, lsr #6\n"
243 "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
244 "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
246 "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
247 "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
255 "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
256 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
260 class CopyVldVstBenchmark : public CopyBandwidthBenchmark {
262 CopyVldVstBenchmark() : CopyBandwidthBenchmark() { }
263 virtual ~CopyVldVstBenchmark() {}
265 const char *getName() { return "vld/vst"; }
268 // Copy using vld/vst instructions.
269 void bench(size_t num_loops) {
271 "stmfd sp!, {r0,r1,r2,r3,r4}\n"
279 "mov r4, r2, lsr #6\n"
282 "vld1.8 {d0-d3}, [r0]!\n"
283 "vld1.8 {d4-d7}, [r0]!\n"
285 "vst1.8 {d0-d3}, [r1:128]!\n"
286 "vst1.8 {d4-d7}, [r1:128]!\n"
294 "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
295 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
299 class CopyVldmiaVstmiaBenchmark : public CopyBandwidthBenchmark {
301 CopyVldmiaVstmiaBenchmark() : CopyBandwidthBenchmark() { }
302 virtual ~CopyVldmiaVstmiaBenchmark() {}
304 const char *getName() { return "vldmia/vstmia"; }
307 // Copy using vld/vst instructions.
308 void bench(size_t num_loops) {
310 "stmfd sp!, {r0,r1,r2,r3,r4}\n"
318 "mov r4, r2, lsr #6\n"
321 "vldmia r0!, {d0-d7}\n"
323 "vstmia r1!, {d0-d7}\n"
331 "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
332 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
336 class MemcpyBenchmark : public CopyBandwidthBenchmark {
338 MemcpyBenchmark() : CopyBandwidthBenchmark() { }
339 virtual ~MemcpyBenchmark() {}
341 const char *getName() { return "memcpy"; }
344 void bench(size_t num_loops) {
345 for (size_t i = 0; i < num_loops; i++) {
346 memcpy(_dst, _src, _size);
351 class SingleBufferBandwidthBenchmark : public BandwidthBenchmark {
353 SingleBufferBandwidthBenchmark() : BandwidthBenchmark(), _buffer(NULL) { }
354 virtual ~SingleBufferBandwidthBenchmark() {
361 bool setSize(size_t size) {
368 _size = DEFAULT_SINGLE_BUFFER_SIZE;
373 _buffer = reinterpret_cast<char*>(memalign(64, _size));
375 perror("Failed to allocate memory for test.");
378 memset(_buffer, 0, _size);
383 bool verify() { return true; }
388 static const unsigned int DEFAULT_SINGLE_BUFFER_SIZE = 16000;
391 class WriteBandwidthBenchmark : public SingleBufferBandwidthBenchmark {
393 WriteBandwidthBenchmark() : SingleBufferBandwidthBenchmark() { }
394 virtual ~WriteBandwidthBenchmark() { }
397 memset(_buffer, 0, _size);
399 for (size_t i = 0; i < _size; i++) {
400 if (_buffer[i] != 1) {
401 printf("Strings failed to compare after one loop.\n");
406 memset(_buffer, 0, _size);
408 for (size_t i = 0; i < _size; i++) {
409 if (_buffer[i] != 2) {
410 printf("Strings failed to compare after two loops.\n");
419 class WriteStrdBenchmark : public WriteBandwidthBenchmark {
421 WriteStrdBenchmark() : WriteBandwidthBenchmark() { }
422 virtual ~WriteStrdBenchmark() {}
424 const char *getName() { return "strd"; }
427 // Write a given value using strd.
428 void bench(size_t num_loops) {
430 "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
440 "mov r3, r1, lsr #5\n"
442 "add r4, r4, #0x01010101\n"
447 "strd r4, r5, [r0]\n"
448 "strd r4, r5, [r0, #8]\n"
449 "strd r4, r5, [r0, #16]\n"
450 "strd r4, r5, [r0, #24]\n"
458 "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
459 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
463 class WriteStmiaBenchmark : public WriteBandwidthBenchmark {
465 WriteStmiaBenchmark() : WriteBandwidthBenchmark() { }
466 virtual ~WriteStmiaBenchmark() {}
468 const char *getName() { return "stmia"; }
471 // Write a given value using stmia.
472 void bench(size_t num_loops) {
474 "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
483 "mov r3, r1, lsr #5\n"
485 "add r4, r4, #0x01010101\n"
496 "stmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
503 "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
504 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
508 class WriteVstBenchmark : public WriteBandwidthBenchmark {
510 WriteVstBenchmark() : WriteBandwidthBenchmark() { }
511 virtual ~WriteVstBenchmark() {}
513 const char *getName() { return "vst"; }
515 bool usesNeon() { return true; }
518 // Write a given value using vst.
519 void bench(size_t num_loops) {
520 #if defined(__ARM_NEON__)
522 "stmfd sp!, {r0,r1,r2,r3,r4}\n"
530 "mov r3, r1, lsr #5\n"
540 "vst1.8 {d0-d3}, [r0:128]!\n"
547 "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
548 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
553 class WriteVstmiaBenchmark : public WriteBandwidthBenchmark {
555 WriteVstmiaBenchmark() : WriteBandwidthBenchmark() { }
556 virtual ~WriteVstmiaBenchmark() {}
558 const char *getName() { return "vstmia"; }
560 bool usesNeon() { return true; }
563 // Write a given value using vstmia.
564 void bench(size_t num_loops) {
565 #if defined(__ARM_NEON__)
567 "stmfd sp!, {r0,r1,r2,r3,r4}\n"
575 "mov r3, r1, lsr #5\n"
585 "vstmia r0!, {d0-d3}\n"
592 "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
593 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
598 class MemsetBenchmark : public WriteBandwidthBenchmark {
600 MemsetBenchmark() : WriteBandwidthBenchmark() { }
601 virtual ~MemsetBenchmark() {}
603 const char *getName() { return "memset"; }
606 void bench(size_t num_loops) {
607 for (size_t i = 0; i < num_loops; i++) {
608 memset(_buffer, (i % 255) + 1, _size);
613 class ReadLdrdBenchmark : public SingleBufferBandwidthBenchmark {
615 ReadLdrdBenchmark() : SingleBufferBandwidthBenchmark() { }
616 virtual ~ReadLdrdBenchmark() {}
618 const char *getName() { return "ldrd"; }
621 // Write a given value using strd.
622 void bench(size_t num_loops) {
624 "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
631 "mov r3, r1, lsr #5\n"
635 "ldrd r4, r5, [r0]\n"
636 "ldrd r4, r5, [r0, #8]\n"
637 "ldrd r4, r5, [r0, #16]\n"
638 "ldrd r4, r5, [r0, #24]\n"
646 "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
647 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
651 class ReadLdmiaBenchmark : public SingleBufferBandwidthBenchmark {
653 ReadLdmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
654 virtual ~ReadLdmiaBenchmark() {}
656 const char *getName() { return "ldmia"; }
659 // Write a given value using stmia.
660 void bench(size_t num_loops) {
662 "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
669 "mov r3, r1, lsr #5\n"
673 "ldmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
680 "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
681 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
685 class ReadVldBenchmark : public SingleBufferBandwidthBenchmark {
687 ReadVldBenchmark() : SingleBufferBandwidthBenchmark() { }
688 virtual ~ReadVldBenchmark() {}
690 const char *getName() { return "vld"; }
692 bool usesNeon() { return true; }
695 // Write a given value using vst.
696 void bench(size_t num_loops) {
697 #if defined(__ARM_NEON__)
699 "stmfd sp!, {r0,r1,r2,r3}\n"
706 "mov r3, r1, lsr #5\n"
710 "vld1.8 {d0-d3}, [r0:128]!\n"
717 "ldmfd sp!, {r0,r1,r2,r3}\n"
718 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
723 class ReadVldmiaBenchmark : public SingleBufferBandwidthBenchmark {
725 ReadVldmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
726 virtual ~ReadVldmiaBenchmark() {}
728 const char *getName() { return "vldmia"; }
730 bool usesNeon() { return true; }
733 // Write a given value using vstmia.
734 void bench(size_t num_loops) {
735 #if defined(__ARM_NEON__)
737 "stmfd sp!, {r0,r1,r2,r3}\n"
744 "mov r3, r1, lsr #5\n"
748 "vldmia r0!, {d0-d3}\n"
755 "ldmfd sp!, {r0,r1,r2,r3}\n"
756 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
761 #endif // __BANDWIDTH_H__