OSDN Git Service

Merge "More C++11 compatibility."
[android-x86/system-extras.git] / tests / memtest / bandwidth.h
1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #ifndef __BANDWIDTH_H__
18 #define __BANDWIDTH_H__
19
20 #include <stdlib.h>
21
22 #include "utils/Compat.h"
23 #include "memtest.h"
24
25 // Bandwidth Class definitions.
26 class BandwidthBenchmark {
27 public:
28     BandwidthBenchmark()
29         : _size(0),
30           _num_warm_loops(DEFAULT_NUM_WARM_LOOPS),
31           _num_loops(DEFAULT_NUM_LOOPS) {}
32     virtual ~BandwidthBenchmark() {}
33
34     bool run() {
35         if (_size == 0) {
36             return false;
37         }
38         if (!canRun()) {
39             return false;
40         }
41
42         bench(_num_warm_loops);
43
44         nsecs_t t = system_time();
45         bench(_num_loops);
46         t = system_time() - t;
47
48         _mb_per_sec = (_size*(_num_loops/_BYTES_PER_MB))/(t/_NUM_NS_PER_SEC);
49
50         return true;
51     }
52
53     bool canRun() { return !usesNeon() || isNeonSupported(); }
54
55     virtual bool setSize(size_t size) = 0;
56
57     virtual const char *getName() = 0;
58
59     virtual bool verify() = 0;
60
61     virtual bool usesNeon() { return false; }
62
63     bool isNeonSupported() {
64 #if defined(__ARM_NEON__)
65         return true;
66 #else
67         return false;
68 #endif
69     }
70
71     // Accessors/mutators.
72     double mb_per_sec() { return _mb_per_sec; }
73     size_t num_warm_loops() { return _num_warm_loops; }
74     size_t num_loops() { return _num_loops; }
75     size_t size() { return _size; }
76
77     void set_num_warm_loops(size_t num_warm_loops) {
78         _num_warm_loops = num_warm_loops;
79     }
80     void set_num_loops(size_t num_loops) { _num_loops = num_loops; }
81
82     // Static constants
83     static const unsigned int DEFAULT_NUM_WARM_LOOPS = 1000000;
84     static const unsigned int DEFAULT_NUM_LOOPS = 20000000;
85
86 protected:
87     virtual void bench(size_t num_loops) = 0;
88
89     double _mb_per_sec;
90     size_t _size;
91     size_t _num_warm_loops;
92     size_t _num_loops;
93
94 private:
95     // Static constants
96     static const CONSTEXPR double _NUM_NS_PER_SEC = 1000000000.0;
97     static const CONSTEXPR double _BYTES_PER_MB = 1024.0* 1024.0;
98 };
99
100 class CopyBandwidthBenchmark : public BandwidthBenchmark {
101 public:
102     CopyBandwidthBenchmark() : BandwidthBenchmark(), _src(NULL), _dst(NULL) { }
103
104     bool setSize(size_t size) {
105         if (_src) {
106            free(_src);
107         }
108         if (_dst) {
109             free(_dst);
110         }
111
112         if (size == 0) {
113             _size = DEFAULT_COPY_SIZE;
114         } else {
115             _size = size;
116         }
117
118         _src = reinterpret_cast<char*>(memalign(64, _size));
119         if (!_src) {
120             perror("Failed to allocate memory for test.");
121             return false;
122         }
123         _dst = reinterpret_cast<char*>(memalign(64, _size));
124         if (!_dst) {
125             perror("Failed to allocate memory for test.");
126             return false;
127         }
128
129         return true;
130     }
131     virtual ~CopyBandwidthBenchmark() {
132         if (_src) {
133             free(_src);
134             _src = NULL;
135         }
136         if (_dst) {
137             free(_dst);
138             _dst = NULL;
139         }
140     }
141
142     bool verify() {
143         memset(_src, 0x23, _size);
144         memset(_dst, 0, _size);
145         bench(1);
146         if (memcmp(_src, _dst, _size) != 0) {
147             printf("Buffers failed to compare after one loop.\n");
148             return false;
149         }
150
151         memset(_src, 0x23, _size);
152         memset(_dst, 0, _size);
153         _num_loops = 2;
154         bench(2);
155         if (memcmp(_src, _dst, _size) != 0) {
156             printf("Buffers failed to compare after two loops.\n");
157             return false;
158         }
159
160         return true;
161     }
162
163 protected:
164     char *_src;
165     char *_dst;
166
167     static const unsigned int DEFAULT_COPY_SIZE = 8000;
168 };
169
170 class CopyLdrdStrdBenchmark : public CopyBandwidthBenchmark {
171 public:
172     CopyLdrdStrdBenchmark() : CopyBandwidthBenchmark() { }
173     virtual ~CopyLdrdStrdBenchmark() {}
174
175     const char *getName() { return "ldrd/strd"; }
176
177 protected:
178     // Copy using ldrd/strd instructions.
179     void bench(size_t num_loops) {
180         asm volatile(
181             "stmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
182
183             "mov r0, %0\n"
184             "mov r1, %1\n"
185             "mov r2, %2\n"
186             "mov r3, %3\n"
187
188             "0:\n"
189             "mov r4, r2, lsr #6\n"
190
191             "1:\n"
192             "ldrd r6, r7, [r0]\n"
193             "strd r6, r7, [r1]\n"
194             "ldrd r6, r7, [r0, #8]\n"
195             "strd r6, r7, [r1, #8]\n"
196             "ldrd r6, r7, [r0, #16]\n"
197             "strd r6, r7, [r1, #16]\n"
198             "ldrd r6, r7, [r0, #24]\n"
199             "strd r6, r7, [r1, #24]\n"
200             "ldrd r6, r7, [r0, #32]\n"
201             "strd r6, r7, [r1, #32]\n"
202             "ldrd r6, r7, [r0, #40]\n"
203             "strd r6, r7, [r1, #40]\n"
204             "ldrd r6, r7, [r0, #48]\n"
205             "strd r6, r7, [r1, #48]\n"
206             "ldrd r6, r7, [r0, #56]\n"
207             "strd r6, r7, [r1, #56]\n"
208
209             "add  r0, r0, #64\n"
210             "add  r1, r1, #64\n"
211             "subs r4, r4, #1\n"
212             "bgt 1b\n"
213
214             "sub r0, r0, r2\n"
215             "sub r1, r1, r2\n"
216             "subs r3, r3, #1\n"
217             "bgt 0b\n"
218
219             "ldmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
220         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
221     }
222 };
223
224 class CopyLdmiaStmiaBenchmark : public CopyBandwidthBenchmark {
225 public:
226     CopyLdmiaStmiaBenchmark() : CopyBandwidthBenchmark() { }
227     virtual ~CopyLdmiaStmiaBenchmark() {}
228
229     const char *getName() { return "ldmia/stmia"; }
230
231 protected:
232     // Copy using ldmia/stmia instructions.
233     void bench(size_t num_loops) {
234         asm volatile(
235             "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
236
237             "mov r0, %0\n"
238             "mov r1, %1\n"
239             "mov r2, %2\n"
240             "mov r3, %3\n"
241
242             "0:\n"
243             "mov r4, r2, lsr #6\n"
244
245             "1:\n"
246             "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
247             "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
248             "subs r4, r4, #1\n"
249             "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
250             "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
251             "bgt 1b\n"
252
253             "sub r0, r0, r2\n"
254             "sub r1, r1, r2\n"
255             "subs r3, r3, #1\n"
256             "bgt 0b\n"
257
258             "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
259         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
260     }
261 };
262
263 class CopyVld1Vst1Benchmark : public CopyBandwidthBenchmark {
264 public:
265     CopyVld1Vst1Benchmark() : CopyBandwidthBenchmark() { }
266     virtual ~CopyVld1Vst1Benchmark() {}
267
268     const char *getName() { return "vld1/vst1"; }
269
270     bool usesNeon() { return true; }
271
272 protected:
273     // Copy using vld1/vst1 instructions.
274     void bench(size_t num_loops) {
275 #if defined(__ARM_NEON__)
276         asm volatile(
277             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
278
279             "mov r0, %0\n"
280             "mov r1, %1\n"
281             "mov r2, %2\n"
282             "mov r3, %3\n"
283
284             "0:\n"
285             "mov r4, r2, lsr #6\n"
286
287             "1:\n"
288             "vld1.8 {d0-d3}, [r0]!\n"
289             "vld1.8 {d4-d7}, [r0]!\n"
290             "subs r4, r4, #1\n"
291             "vst1.8 {d0-d3}, [r1:128]!\n"
292             "vst1.8 {d4-d7}, [r1:128]!\n"
293             "bgt 1b\n"
294
295             "sub r0, r0, r2\n"
296             "sub r1, r1, r2\n"
297             "subs r3, r3, #1\n"
298             "bgt 0b\n"
299
300             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
301         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
302 #endif
303     }
304 };
305
306 class CopyVldrVstrBenchmark : public CopyBandwidthBenchmark {
307 public:
308     CopyVldrVstrBenchmark() : CopyBandwidthBenchmark() { }
309     virtual ~CopyVldrVstrBenchmark() {}
310
311     const char *getName() { return "vldr/vstr"; }
312
313     bool usesNeon() { return true; }
314
315 protected:
316     // Copy using vldr/vstr instructions.
317     void bench(size_t num_loops) {
318 #if defined(__ARM_NEON__)
319         asm volatile(
320             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
321
322             "mov r0, %0\n"
323             "mov r1, %1\n"
324             "mov r2, %2\n"
325             "mov r3, %3\n"
326
327             "0:\n"
328             "mov r4, r2, lsr #6\n"
329
330             "1:\n"
331             "vldr d0, [r0, #0]\n"
332             "subs r4, r4, #1\n"
333             "vldr d1, [r0, #8]\n"
334             "vstr d0, [r1, #0]\n"
335             "vldr d0, [r0, #16]\n"
336             "vstr d1, [r1, #8]\n"
337             "vldr d1, [r0, #24]\n"
338             "vstr d0, [r1, #16]\n"
339             "vldr d0, [r0, #32]\n"
340             "vstr d1, [r1, #24]\n"
341             "vldr d1, [r0, #40]\n"
342             "vstr d0, [r1, #32]\n"
343             "vldr d0, [r0, #48]\n"
344             "vstr d1, [r1, #40]\n"
345             "vldr d1, [r0, #56]\n"
346             "vstr d0, [r1, #48]\n"
347             "add r0, r0, #64\n"
348             "vstr d1, [r1, #56]\n"
349             "add r1, r1, #64\n"
350             "bgt 1b\n"
351
352             "sub r0, r0, r2\n"
353             "sub r1, r1, r2\n"
354             "subs r3, r3, #1\n"
355             "bgt 0b\n"
356
357             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
358         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
359 #endif
360     }
361 };
362
363 class CopyVldmiaVstmiaBenchmark : public CopyBandwidthBenchmark {
364 public:
365     CopyVldmiaVstmiaBenchmark() : CopyBandwidthBenchmark() { }
366     virtual ~CopyVldmiaVstmiaBenchmark() {}
367
368     const char *getName() { return "vldmia/vstmia"; }
369
370     bool usesNeon() { return true; }
371
372 protected:
373     // Copy using vldmia/vstmia instructions.
374     void bench(size_t num_loops) {
375 #if defined(__ARM_NEON__)
376         asm volatile(
377             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
378
379             "mov r0, %0\n"
380             "mov r1, %1\n"
381             "mov r2, %2\n"
382             "mov r3, %3\n"
383
384             "0:\n"
385             "mov r4, r2, lsr #6\n"
386
387             "1:\n"
388             "vldmia r0!, {d0-d7}\n"
389             "subs r4, r4, #1\n"
390             "vstmia r1!, {d0-d7}\n"
391             "bgt 1b\n"
392
393             "sub r0, r0, r2\n"
394             "sub r1, r1, r2\n"
395             "subs r3, r3, #1\n"
396             "bgt 0b\n"
397
398             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
399         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
400 #endif
401     }
402 };
403
404 class MemcpyBenchmark : public CopyBandwidthBenchmark {
405 public:
406     MemcpyBenchmark() : CopyBandwidthBenchmark() { }
407     virtual ~MemcpyBenchmark() {}
408
409     const char *getName() { return "memcpy"; }
410
411 protected:
412     void bench(size_t num_loops) {
413         for (size_t i = 0; i < num_loops; i++) {
414             memcpy(_dst, _src, _size);
415         }
416     }
417 };
418
419 class SingleBufferBandwidthBenchmark : public BandwidthBenchmark {
420 public:
421     SingleBufferBandwidthBenchmark() : BandwidthBenchmark(), _buffer(NULL) { }
422     virtual ~SingleBufferBandwidthBenchmark() {
423         if (_buffer) {
424             free(_buffer);
425             _buffer = NULL;
426         }
427     }
428
429     bool setSize(size_t size) {
430         if (_buffer) {
431             free(_buffer);
432             _buffer = NULL;
433         }
434
435         if (_size == 0) {
436             _size = DEFAULT_SINGLE_BUFFER_SIZE;
437         } else {
438             _size = size;
439         }
440
441         _buffer = reinterpret_cast<char*>(memalign(64, _size));
442         if (!_buffer) {
443             perror("Failed to allocate memory for test.");
444             return false;
445         }
446         memset(_buffer, 0, _size);
447
448         return true;
449     }
450
451     bool verify() { return true; }
452
453 protected:
454     char *_buffer;
455
456     static const unsigned int DEFAULT_SINGLE_BUFFER_SIZE = 16000;
457 };
458
459 class WriteBandwidthBenchmark : public SingleBufferBandwidthBenchmark {
460 public:
461     WriteBandwidthBenchmark() : SingleBufferBandwidthBenchmark() { }
462     virtual ~WriteBandwidthBenchmark() { }
463
464     bool verify() {
465         memset(_buffer, 0, _size);
466         bench(1);
467         for (size_t i = 0; i < _size; i++) {
468             if (_buffer[i] != 1) {
469                 printf("Buffer failed to compare after one loop.\n");
470                 return false;
471             }
472         }
473
474         memset(_buffer, 0, _size);
475         bench(2);
476         for (size_t i = 0; i < _size; i++) {
477             if (_buffer[i] != 2) {
478                 printf("Buffer failed to compare after two loops.\n");
479                 return false;
480             }
481         }
482
483         return true;
484     }
485 };
486
487 class WriteStrdBenchmark : public WriteBandwidthBenchmark {
488 public:
489     WriteStrdBenchmark() : WriteBandwidthBenchmark() { }
490     virtual ~WriteStrdBenchmark() {}
491
492     const char *getName() { return "strd"; }
493
494 protected:
495     // Write a given value using strd.
496     void bench(size_t num_loops) {
497         asm volatile(
498             "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
499
500             "mov r0, %0\n"
501             "mov r1, %1\n"
502             "mov r2, %2\n"
503
504             "mov r4, #0\n"
505             "mov r5, #0\n"
506
507             "0:\n"
508             "mov r3, r1, lsr #5\n"
509
510             "add r4, r4, #0x01010101\n"
511             "mov r5, r4\n"
512
513             "1:\n"
514             "subs r3, r3, #1\n"
515             "strd r4, r5, [r0]\n"
516             "strd r4, r5, [r0, #8]\n"
517             "strd r4, r5, [r0, #16]\n"
518             "strd r4, r5, [r0, #24]\n"
519             "add  r0, r0, #32\n"
520             "bgt 1b\n"
521
522             "sub r0, r0, r1\n"
523             "subs r2, r2, #1\n"
524             "bgt 0b\n"
525
526             "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
527           :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
528     }
529 };
530
531 class WriteStmiaBenchmark : public WriteBandwidthBenchmark {
532 public:
533     WriteStmiaBenchmark() : WriteBandwidthBenchmark() { }
534     virtual ~WriteStmiaBenchmark() {}
535
536     const char *getName() { return "stmia"; }
537
538 protected:
539       // Write a given value using stmia.
540       void bench(size_t num_loops) {
541           asm volatile(
542               "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
543
544               "mov r0, %0\n"
545               "mov r1, %1\n"
546               "mov r2, %2\n"
547
548               "mov r4, #0\n"
549
550               "0:\n"
551               "mov r3, r1, lsr #5\n"
552
553               "add r4, r4, #0x01010101\n"
554               "mov r5, r4\n"
555               "mov r6, r4\n"
556               "mov r7, r4\n"
557               "mov r8, r4\n"
558               "mov r9, r4\n"
559               "mov r10, r4\n"
560               "mov r11, r4\n"
561
562               "1:\n"
563               "subs r3, r3, #1\n"
564               "stmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
565               "bgt 1b\n"
566
567               "sub r0, r0, r1\n"
568               "subs r2, r2, #1\n"
569               "bgt 0b\n"
570
571               "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
572         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
573     }
574 };
575
576 class WriteVst1Benchmark : public WriteBandwidthBenchmark {
577 public:
578     WriteVst1Benchmark() : WriteBandwidthBenchmark() { }
579     virtual ~WriteVst1Benchmark() {}
580
581     const char *getName() { return "vst1"; }
582
583     bool usesNeon() { return true; }
584
585 protected:
586     // Write a given value using vst.
587     void bench(size_t num_loops) {
588 #if defined(__ARM_NEON__)
589         asm volatile(
590             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
591
592             "mov r0, %0\n"
593             "mov r1, %1\n"
594             "mov r2, %2\n"
595             "mov r4, #0\n"
596
597             "0:\n"
598             "mov r3, r1, lsr #5\n"
599
600             "add r4, r4, #1\n"
601             "vdup.8 d0, r4\n"
602             "vmov d1, d0\n"
603             "vmov d2, d0\n"
604             "vmov d3, d0\n"
605
606             "1:\n"
607             "subs r3, r3, #1\n"
608             "vst1.8 {d0-d3}, [r0:128]!\n"
609             "bgt 1b\n"
610
611             "sub r0, r0, r1\n"
612             "subs r2, r2, #1\n"
613             "bgt 0b\n"
614
615             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
616         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
617 #endif
618     }
619 };
620
621 class WriteVstrBenchmark : public WriteBandwidthBenchmark {
622 public:
623     WriteVstrBenchmark() : WriteBandwidthBenchmark() { }
624     virtual ~WriteVstrBenchmark() {}
625
626     const char *getName() { return "vstr"; }
627
628     bool usesNeon() { return true; }
629
630 protected:
631     // Write a given value using vst.
632     void bench(size_t num_loops) {
633 #if defined(__ARM_NEON__)
634         asm volatile(
635             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
636
637             "mov r0, %0\n"
638             "mov r1, %1\n"
639             "mov r2, %2\n"
640             "mov r4, #0\n"
641
642             "0:\n"
643             "mov r3, r1, lsr #5\n"
644
645             "add r4, r4, #1\n"
646             "vdup.8 d0, r4\n"
647             "vmov d1, d0\n"
648             "vmov d2, d0\n"
649             "vmov d3, d0\n"
650
651             "1:\n"
652             "vstr d0, [r0, #0]\n"
653             "subs r3, r3, #1\n"
654             "vstr d1, [r0, #8]\n"
655             "vstr d0, [r0, #16]\n"
656             "vstr d1, [r0, #24]\n"
657             "add r0, r0, #32\n"
658             "bgt 1b\n"
659
660             "sub r0, r0, r1\n"
661             "subs r2, r2, #1\n"
662             "bgt 0b\n"
663
664             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
665         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
666 #endif
667     }
668 };
669
670 class WriteVstmiaBenchmark : public WriteBandwidthBenchmark {
671 public:
672     WriteVstmiaBenchmark() : WriteBandwidthBenchmark() { }
673     virtual ~WriteVstmiaBenchmark() {}
674
675     const char *getName() { return "vstmia"; }
676
677     bool usesNeon() { return true; }
678
679 protected:
680     // Write a given value using vstmia.
681     void bench(size_t num_loops) {
682 #if defined(__ARM_NEON__)
683         asm volatile(
684             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
685
686             "mov r0, %0\n"
687             "mov r1, %1\n"
688             "mov r2, %2\n"
689             "mov r4, #0\n"
690
691             "0:\n"
692             "mov r3, r1, lsr #5\n"
693
694             "add r4, r4, #1\n"
695             "vdup.8 d0, r4\n"
696             "vmov d1, d0\n"
697             "vmov d2, d0\n"
698             "vmov d3, d0\n"
699
700             "1:\n"
701             "subs r3, r3, #1\n"
702             "vstmia r0!, {d0-d3}\n"
703             "bgt 1b\n"
704
705             "sub r0, r0, r1\n"
706             "subs r2, r2, #1\n"
707             "bgt 0b\n"
708
709             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
710         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
711 #endif
712     }
713 };
714
715 class MemsetBenchmark : public WriteBandwidthBenchmark {
716 public:
717     MemsetBenchmark() : WriteBandwidthBenchmark() { }
718     virtual ~MemsetBenchmark() {}
719
720     const char *getName() { return "memset"; }
721
722 protected:
723     void bench(size_t num_loops) {
724         for (size_t i = 0; i < num_loops; i++) {
725             memset(_buffer, (i % 255) + 1, _size);
726         }
727     }
728 };
729
730 class ReadLdrdBenchmark : public SingleBufferBandwidthBenchmark {
731 public:
732     ReadLdrdBenchmark() : SingleBufferBandwidthBenchmark() { }
733     virtual ~ReadLdrdBenchmark() {}
734
735     const char *getName() { return "ldrd"; }
736
737 protected:
738     // Write a given value using strd.
739     void bench(size_t num_loops) {
740         asm volatile(
741             "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
742
743             "mov r0, %0\n"
744             "mov r1, %1\n"
745             "mov r2, %2\n"
746
747             "0:\n"
748             "mov r3, r1, lsr #5\n"
749
750             "1:\n"
751             "subs r3, r3, #1\n"
752             "ldrd r4, r5, [r0]\n"
753             "ldrd r4, r5, [r0, #8]\n"
754             "ldrd r4, r5, [r0, #16]\n"
755             "ldrd r4, r5, [r0, #24]\n"
756             "add  r0, r0, #32\n"
757             "bgt 1b\n"
758
759             "sub r0, r0, r1\n"
760             "subs r2, r2, #1\n"
761             "bgt 0b\n"
762
763             "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
764           :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
765     }
766 };
767
768 class ReadLdmiaBenchmark : public SingleBufferBandwidthBenchmark {
769 public:
770     ReadLdmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
771     virtual ~ReadLdmiaBenchmark() {}
772
773     const char *getName() { return "ldmia"; }
774
775 protected:
776       // Write a given value using stmia.
777       void bench(size_t num_loops) {
778           asm volatile(
779               "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
780
781               "mov r0, %0\n"
782               "mov r1, %1\n"
783               "mov r2, %2\n"
784
785               "0:\n"
786               "mov r3, r1, lsr #5\n"
787
788               "1:\n"
789               "subs r3, r3, #1\n"
790               "ldmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
791               "bgt 1b\n"
792
793               "sub r0, r0, r1\n"
794               "subs r2, r2, #1\n"
795               "bgt 0b\n"
796
797               "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
798         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
799     }
800 };
801
802 class ReadVld1Benchmark : public SingleBufferBandwidthBenchmark {
803 public:
804     ReadVld1Benchmark() : SingleBufferBandwidthBenchmark() { }
805     virtual ~ReadVld1Benchmark() {}
806
807     const char *getName() { return "vld1"; }
808
809     bool usesNeon() { return true; }
810
811 protected:
812     // Write a given value using vst.
813     void bench(size_t num_loops) {
814 #if defined(__ARM_NEON__)
815         asm volatile(
816             "stmfd sp!, {r0,r1,r2,r3}\n"
817
818             "mov r0, %0\n"
819             "mov r1, %1\n"
820             "mov r2, %2\n"
821
822             "0:\n"
823             "mov r3, r1, lsr #5\n"
824
825             "1:\n"
826             "subs r3, r3, #1\n"
827             "vld1.8 {d0-d3}, [r0:128]!\n"
828             "bgt 1b\n"
829
830             "sub r0, r0, r1\n"
831             "subs r2, r2, #1\n"
832             "bgt 0b\n"
833
834             "ldmfd sp!, {r0,r1,r2,r3}\n"
835         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
836 #endif
837     }
838 };
839
840 class ReadVldrBenchmark : public SingleBufferBandwidthBenchmark {
841 public:
842     ReadVldrBenchmark() : SingleBufferBandwidthBenchmark() { }
843     virtual ~ReadVldrBenchmark() {}
844
845     const char *getName() { return "vldr"; }
846
847     bool usesNeon() { return true; }
848
849 protected:
850     // Write a given value using vst.
851     void bench(size_t num_loops) {
852 #if defined(__ARM_NEON__)
853         asm volatile(
854             "stmfd sp!, {r0,r1,r2,r3}\n"
855
856             "mov r0, %0\n"
857             "mov r1, %1\n"
858             "mov r2, %2\n"
859
860             "0:\n"
861             "mov r3, r1, lsr #5\n"
862
863             "1:\n"
864             "vldr d0, [r0, #0]\n"
865             "subs r3, r3, #1\n"
866             "vldr d1, [r0, #8]\n"
867             "vldr d0, [r0, #16]\n"
868             "vldr d1, [r0, #24]\n"
869             "add r0, r0, #32\n"
870             "bgt 1b\n"
871
872             "sub r0, r0, r1\n"
873             "subs r2, r2, #1\n"
874             "bgt 0b\n"
875
876             "ldmfd sp!, {r0,r1,r2,r3}\n"
877         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
878 #endif
879     }
880 };
881
882
883 class ReadVldmiaBenchmark : public SingleBufferBandwidthBenchmark {
884 public:
885     ReadVldmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
886     virtual ~ReadVldmiaBenchmark() {}
887
888     const char *getName() { return "vldmia"; }
889
890     bool usesNeon() { return true; }
891
892 protected:
893     // Write a given value using vstmia.
894     void bench(size_t num_loops) {
895 #if defined(__ARM_NEON__)
896         asm volatile(
897             "stmfd sp!, {r0,r1,r2,r3}\n"
898
899             "mov r0, %0\n"
900             "mov r1, %1\n"
901             "mov r2, %2\n"
902
903             "0:\n"
904             "mov r3, r1, lsr #5\n"
905
906             "1:\n"
907             "subs r3, r3, #1\n"
908             "vldmia r0!, {d0-d3}\n"
909             "bgt 1b\n"
910
911             "sub r0, r0, r1\n"
912             "subs r2, r2, #1\n"
913             "bgt 0b\n"
914
915             "ldmfd sp!, {r0,r1,r2,r3}\n"
916         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
917 #endif
918     }
919 };
920
921 #endif  // __BANDWIDTH_H__