OSDN Git Service

ANRdaemon: move trace result from /sdcard to /data
[android-x86/system-extras.git] / tests / memtest / bandwidth.h
1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #ifndef __BANDWIDTH_H__
18 #define __BANDWIDTH_H__
19
20 #include <stdlib.h>
21 #include <string.h>
22
23 #include "utils/Compat.h"
24 #include "memtest.h"
25
26 // Bandwidth Class definitions.
27 class BandwidthBenchmark {
28 public:
29     BandwidthBenchmark()
30         : _size(0),
31           _num_warm_loops(DEFAULT_NUM_WARM_LOOPS),
32           _num_loops(DEFAULT_NUM_LOOPS) {}
33     virtual ~BandwidthBenchmark() {}
34
35     bool run() {
36         if (_size == 0) {
37             return false;
38         }
39         if (!canRun()) {
40             return false;
41         }
42
43         bench(_num_warm_loops);
44
45         nsecs_t t = system_time();
46         bench(_num_loops);
47         t = system_time() - t;
48
49         _mb_per_sec = (_size*(_num_loops/_BYTES_PER_MB))/(t/_NUM_NS_PER_SEC);
50
51         return true;
52     }
53
54     bool canRun() { return !usesNeon() || isNeonSupported(); }
55
56     virtual bool setSize(size_t size) = 0;
57
58     virtual const char *getName() = 0;
59
60     virtual bool verify() = 0;
61
62     virtual bool usesNeon() { return false; }
63
64     bool isNeonSupported() {
65 #if defined(__ARM_NEON__)
66         return true;
67 #else
68         return false;
69 #endif
70     }
71
72     // Accessors/mutators.
73     double mb_per_sec() { return _mb_per_sec; }
74     size_t num_warm_loops() { return _num_warm_loops; }
75     size_t num_loops() { return _num_loops; }
76     size_t size() { return _size; }
77
78     void set_num_warm_loops(size_t num_warm_loops) {
79         _num_warm_loops = num_warm_loops;
80     }
81     void set_num_loops(size_t num_loops) { _num_loops = num_loops; }
82
83     // Static constants
84     static const unsigned int DEFAULT_NUM_WARM_LOOPS = 1000000;
85     static const unsigned int DEFAULT_NUM_LOOPS = 20000000;
86
87 protected:
88     virtual void bench(size_t num_loops) = 0;
89
90     double _mb_per_sec;
91     size_t _size;
92     size_t _num_warm_loops;
93     size_t _num_loops;
94
95 private:
96     // Static constants
97     static const CONSTEXPR double _NUM_NS_PER_SEC = 1000000000.0;
98     static const CONSTEXPR double _BYTES_PER_MB = 1024.0* 1024.0;
99 };
100
101 class CopyBandwidthBenchmark : public BandwidthBenchmark {
102 public:
103     CopyBandwidthBenchmark() : BandwidthBenchmark(), _src(NULL), _dst(NULL) { }
104
105     bool setSize(size_t size) {
106         if (_src) {
107            free(_src);
108         }
109         if (_dst) {
110             free(_dst);
111         }
112
113         if (size == 0) {
114             _size = DEFAULT_COPY_SIZE;
115         } else {
116             _size = size;
117         }
118
119         _src = reinterpret_cast<char*>(memalign(64, _size));
120         if (!_src) {
121             perror("Failed to allocate memory for test.");
122             return false;
123         }
124         _dst = reinterpret_cast<char*>(memalign(64, _size));
125         if (!_dst) {
126             perror("Failed to allocate memory for test.");
127             return false;
128         }
129
130         return true;
131     }
132     virtual ~CopyBandwidthBenchmark() {
133         if (_src) {
134             free(_src);
135             _src = NULL;
136         }
137         if (_dst) {
138             free(_dst);
139             _dst = NULL;
140         }
141     }
142
143     bool verify() {
144         memset(_src, 0x23, _size);
145         memset(_dst, 0, _size);
146         bench(1);
147         if (memcmp(_src, _dst, _size) != 0) {
148             printf("Buffers failed to compare after one loop.\n");
149             return false;
150         }
151
152         memset(_src, 0x23, _size);
153         memset(_dst, 0, _size);
154         _num_loops = 2;
155         bench(2);
156         if (memcmp(_src, _dst, _size) != 0) {
157             printf("Buffers failed to compare after two loops.\n");
158             return false;
159         }
160
161         return true;
162     }
163
164 protected:
165     char *_src;
166     char *_dst;
167
168     static const unsigned int DEFAULT_COPY_SIZE = 8000;
169 };
170
171 class CopyLdrdStrdBenchmark : public CopyBandwidthBenchmark {
172 public:
173     CopyLdrdStrdBenchmark() : CopyBandwidthBenchmark() { }
174     virtual ~CopyLdrdStrdBenchmark() {}
175
176     const char *getName() { return "ldrd/strd"; }
177
178 protected:
179     // Copy using ldrd/strd instructions.
180     void bench(size_t num_loops) {
181         asm volatile(
182             "stmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
183
184             "mov r0, %0\n"
185             "mov r1, %1\n"
186             "mov r2, %2\n"
187             "mov r3, %3\n"
188
189             "0:\n"
190             "mov r4, r2, lsr #6\n"
191
192             "1:\n"
193             "ldrd r6, r7, [r0]\n"
194             "strd r6, r7, [r1]\n"
195             "ldrd r6, r7, [r0, #8]\n"
196             "strd r6, r7, [r1, #8]\n"
197             "ldrd r6, r7, [r0, #16]\n"
198             "strd r6, r7, [r1, #16]\n"
199             "ldrd r6, r7, [r0, #24]\n"
200             "strd r6, r7, [r1, #24]\n"
201             "ldrd r6, r7, [r0, #32]\n"
202             "strd r6, r7, [r1, #32]\n"
203             "ldrd r6, r7, [r0, #40]\n"
204             "strd r6, r7, [r1, #40]\n"
205             "ldrd r6, r7, [r0, #48]\n"
206             "strd r6, r7, [r1, #48]\n"
207             "ldrd r6, r7, [r0, #56]\n"
208             "strd r6, r7, [r1, #56]\n"
209
210             "add  r0, r0, #64\n"
211             "add  r1, r1, #64\n"
212             "subs r4, r4, #1\n"
213             "bgt 1b\n"
214
215             "sub r0, r0, r2\n"
216             "sub r1, r1, r2\n"
217             "subs r3, r3, #1\n"
218             "bgt 0b\n"
219
220             "ldmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
221         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
222     }
223 };
224
225 class CopyLdmiaStmiaBenchmark : public CopyBandwidthBenchmark {
226 public:
227     CopyLdmiaStmiaBenchmark() : CopyBandwidthBenchmark() { }
228     virtual ~CopyLdmiaStmiaBenchmark() {}
229
230     const char *getName() { return "ldmia/stmia"; }
231
232 protected:
233     // Copy using ldmia/stmia instructions.
234     void bench(size_t num_loops) {
235         asm volatile(
236             "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
237
238             "mov r0, %0\n"
239             "mov r1, %1\n"
240             "mov r2, %2\n"
241             "mov r3, %3\n"
242
243             "0:\n"
244             "mov r4, r2, lsr #6\n"
245
246             "1:\n"
247             "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
248             "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
249             "subs r4, r4, #1\n"
250             "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
251             "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
252             "bgt 1b\n"
253
254             "sub r0, r0, r2\n"
255             "sub r1, r1, r2\n"
256             "subs r3, r3, #1\n"
257             "bgt 0b\n"
258
259             "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
260         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
261     }
262 };
263
264 class CopyVld1Vst1Benchmark : public CopyBandwidthBenchmark {
265 public:
266     CopyVld1Vst1Benchmark() : CopyBandwidthBenchmark() { }
267     virtual ~CopyVld1Vst1Benchmark() {}
268
269     const char *getName() { return "vld1/vst1"; }
270
271     bool usesNeon() { return true; }
272
273 protected:
274     // Copy using vld1/vst1 instructions.
275     void bench(size_t num_loops) {
276 #if defined(__ARM_NEON__)
277         asm volatile(
278             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
279
280             "mov r0, %0\n"
281             "mov r1, %1\n"
282             "mov r2, %2\n"
283             "mov r3, %3\n"
284
285             "0:\n"
286             "mov r4, r2, lsr #6\n"
287
288             "1:\n"
289             "vld1.8 {d0-d3}, [r0]!\n"
290             "vld1.8 {d4-d7}, [r0]!\n"
291             "subs r4, r4, #1\n"
292             "vst1.8 {d0-d3}, [r1:128]!\n"
293             "vst1.8 {d4-d7}, [r1:128]!\n"
294             "bgt 1b\n"
295
296             "sub r0, r0, r2\n"
297             "sub r1, r1, r2\n"
298             "subs r3, r3, #1\n"
299             "bgt 0b\n"
300
301             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
302         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
303 #endif
304     }
305 };
306
307 class CopyVldrVstrBenchmark : public CopyBandwidthBenchmark {
308 public:
309     CopyVldrVstrBenchmark() : CopyBandwidthBenchmark() { }
310     virtual ~CopyVldrVstrBenchmark() {}
311
312     const char *getName() { return "vldr/vstr"; }
313
314     bool usesNeon() { return true; }
315
316 protected:
317     // Copy using vldr/vstr instructions.
318     void bench(size_t num_loops) {
319 #if defined(__ARM_NEON__)
320         asm volatile(
321             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
322
323             "mov r0, %0\n"
324             "mov r1, %1\n"
325             "mov r2, %2\n"
326             "mov r3, %3\n"
327
328             "0:\n"
329             "mov r4, r2, lsr #6\n"
330
331             "1:\n"
332             "vldr d0, [r0, #0]\n"
333             "subs r4, r4, #1\n"
334             "vldr d1, [r0, #8]\n"
335             "vstr d0, [r1, #0]\n"
336             "vldr d0, [r0, #16]\n"
337             "vstr d1, [r1, #8]\n"
338             "vldr d1, [r0, #24]\n"
339             "vstr d0, [r1, #16]\n"
340             "vldr d0, [r0, #32]\n"
341             "vstr d1, [r1, #24]\n"
342             "vldr d1, [r0, #40]\n"
343             "vstr d0, [r1, #32]\n"
344             "vldr d0, [r0, #48]\n"
345             "vstr d1, [r1, #40]\n"
346             "vldr d1, [r0, #56]\n"
347             "vstr d0, [r1, #48]\n"
348             "add r0, r0, #64\n"
349             "vstr d1, [r1, #56]\n"
350             "add r1, r1, #64\n"
351             "bgt 1b\n"
352
353             "sub r0, r0, r2\n"
354             "sub r1, r1, r2\n"
355             "subs r3, r3, #1\n"
356             "bgt 0b\n"
357
358             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
359         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
360 #endif
361     }
362 };
363
364 class CopyVldmiaVstmiaBenchmark : public CopyBandwidthBenchmark {
365 public:
366     CopyVldmiaVstmiaBenchmark() : CopyBandwidthBenchmark() { }
367     virtual ~CopyVldmiaVstmiaBenchmark() {}
368
369     const char *getName() { return "vldmia/vstmia"; }
370
371     bool usesNeon() { return true; }
372
373 protected:
374     // Copy using vldmia/vstmia instructions.
375     void bench(size_t num_loops) {
376 #if defined(__ARM_NEON__)
377         asm volatile(
378             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
379
380             "mov r0, %0\n"
381             "mov r1, %1\n"
382             "mov r2, %2\n"
383             "mov r3, %3\n"
384
385             "0:\n"
386             "mov r4, r2, lsr #6\n"
387
388             "1:\n"
389             "vldmia r0!, {d0-d7}\n"
390             "subs r4, r4, #1\n"
391             "vstmia r1!, {d0-d7}\n"
392             "bgt 1b\n"
393
394             "sub r0, r0, r2\n"
395             "sub r1, r1, r2\n"
396             "subs r3, r3, #1\n"
397             "bgt 0b\n"
398
399             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
400         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
401 #endif
402     }
403 };
404
405 class MemcpyBenchmark : public CopyBandwidthBenchmark {
406 public:
407     MemcpyBenchmark() : CopyBandwidthBenchmark() { }
408     virtual ~MemcpyBenchmark() {}
409
410     const char *getName() { return "memcpy"; }
411
412 protected:
413     void bench(size_t num_loops) {
414         for (size_t i = 0; i < num_loops; i++) {
415             memcpy(_dst, _src, _size);
416         }
417     }
418 };
419
420 class SingleBufferBandwidthBenchmark : public BandwidthBenchmark {
421 public:
422     SingleBufferBandwidthBenchmark() : BandwidthBenchmark(), _buffer(NULL) { }
423     virtual ~SingleBufferBandwidthBenchmark() {
424         if (_buffer) {
425             free(_buffer);
426             _buffer = NULL;
427         }
428     }
429
430     bool setSize(size_t size) {
431         if (_buffer) {
432             free(_buffer);
433             _buffer = NULL;
434         }
435
436         if (_size == 0) {
437             _size = DEFAULT_SINGLE_BUFFER_SIZE;
438         } else {
439             _size = size;
440         }
441
442         _buffer = reinterpret_cast<char*>(memalign(64, _size));
443         if (!_buffer) {
444             perror("Failed to allocate memory for test.");
445             return false;
446         }
447         memset(_buffer, 0, _size);
448
449         return true;
450     }
451
452     bool verify() { return true; }
453
454 protected:
455     char *_buffer;
456
457     static const unsigned int DEFAULT_SINGLE_BUFFER_SIZE = 16000;
458 };
459
460 class WriteBandwidthBenchmark : public SingleBufferBandwidthBenchmark {
461 public:
462     WriteBandwidthBenchmark() : SingleBufferBandwidthBenchmark() { }
463     virtual ~WriteBandwidthBenchmark() { }
464
465     bool verify() {
466         memset(_buffer, 0, _size);
467         bench(1);
468         for (size_t i = 0; i < _size; i++) {
469             if (_buffer[i] != 1) {
470                 printf("Buffer failed to compare after one loop.\n");
471                 return false;
472             }
473         }
474
475         memset(_buffer, 0, _size);
476         bench(2);
477         for (size_t i = 0; i < _size; i++) {
478             if (_buffer[i] != 2) {
479                 printf("Buffer failed to compare after two loops.\n");
480                 return false;
481             }
482         }
483
484         return true;
485     }
486 };
487
488 class WriteStrdBenchmark : public WriteBandwidthBenchmark {
489 public:
490     WriteStrdBenchmark() : WriteBandwidthBenchmark() { }
491     virtual ~WriteStrdBenchmark() {}
492
493     const char *getName() { return "strd"; }
494
495 protected:
496     // Write a given value using strd.
497     void bench(size_t num_loops) {
498         asm volatile(
499             "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
500
501             "mov r0, %0\n"
502             "mov r1, %1\n"
503             "mov r2, %2\n"
504
505             "mov r4, #0\n"
506             "mov r5, #0\n"
507
508             "0:\n"
509             "mov r3, r1, lsr #5\n"
510
511             "add r4, r4, #0x01010101\n"
512             "mov r5, r4\n"
513
514             "1:\n"
515             "subs r3, r3, #1\n"
516             "strd r4, r5, [r0]\n"
517             "strd r4, r5, [r0, #8]\n"
518             "strd r4, r5, [r0, #16]\n"
519             "strd r4, r5, [r0, #24]\n"
520             "add  r0, r0, #32\n"
521             "bgt 1b\n"
522
523             "sub r0, r0, r1\n"
524             "subs r2, r2, #1\n"
525             "bgt 0b\n"
526
527             "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
528           :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
529     }
530 };
531
532 class WriteStmiaBenchmark : public WriteBandwidthBenchmark {
533 public:
534     WriteStmiaBenchmark() : WriteBandwidthBenchmark() { }
535     virtual ~WriteStmiaBenchmark() {}
536
537     const char *getName() { return "stmia"; }
538
539 protected:
540       // Write a given value using stmia.
541       void bench(size_t num_loops) {
542           asm volatile(
543               "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
544
545               "mov r0, %0\n"
546               "mov r1, %1\n"
547               "mov r2, %2\n"
548
549               "mov r4, #0\n"
550
551               "0:\n"
552               "mov r3, r1, lsr #5\n"
553
554               "add r4, r4, #0x01010101\n"
555               "mov r5, r4\n"
556               "mov r6, r4\n"
557               "mov r7, r4\n"
558               "mov r8, r4\n"
559               "mov r9, r4\n"
560               "mov r10, r4\n"
561               "mov r11, r4\n"
562
563               "1:\n"
564               "subs r3, r3, #1\n"
565               "stmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
566               "bgt 1b\n"
567
568               "sub r0, r0, r1\n"
569               "subs r2, r2, #1\n"
570               "bgt 0b\n"
571
572               "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
573         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
574     }
575 };
576
577 class WriteVst1Benchmark : public WriteBandwidthBenchmark {
578 public:
579     WriteVst1Benchmark() : WriteBandwidthBenchmark() { }
580     virtual ~WriteVst1Benchmark() {}
581
582     const char *getName() { return "vst1"; }
583
584     bool usesNeon() { return true; }
585
586 protected:
587     // Write a given value using vst.
588     void bench(size_t num_loops) {
589 #if defined(__ARM_NEON__)
590         asm volatile(
591             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
592
593             "mov r0, %0\n"
594             "mov r1, %1\n"
595             "mov r2, %2\n"
596             "mov r4, #0\n"
597
598             "0:\n"
599             "mov r3, r1, lsr #5\n"
600
601             "add r4, r4, #1\n"
602             "vdup.8 d0, r4\n"
603             "vmov d1, d0\n"
604             "vmov d2, d0\n"
605             "vmov d3, d0\n"
606
607             "1:\n"
608             "subs r3, r3, #1\n"
609             "vst1.8 {d0-d3}, [r0:128]!\n"
610             "bgt 1b\n"
611
612             "sub r0, r0, r1\n"
613             "subs r2, r2, #1\n"
614             "bgt 0b\n"
615
616             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
617         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
618 #endif
619     }
620 };
621
622 class WriteVstrBenchmark : public WriteBandwidthBenchmark {
623 public:
624     WriteVstrBenchmark() : WriteBandwidthBenchmark() { }
625     virtual ~WriteVstrBenchmark() {}
626
627     const char *getName() { return "vstr"; }
628
629     bool usesNeon() { return true; }
630
631 protected:
632     // Write a given value using vst.
633     void bench(size_t num_loops) {
634 #if defined(__ARM_NEON__)
635         asm volatile(
636             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
637
638             "mov r0, %0\n"
639             "mov r1, %1\n"
640             "mov r2, %2\n"
641             "mov r4, #0\n"
642
643             "0:\n"
644             "mov r3, r1, lsr #5\n"
645
646             "add r4, r4, #1\n"
647             "vdup.8 d0, r4\n"
648             "vmov d1, d0\n"
649             "vmov d2, d0\n"
650             "vmov d3, d0\n"
651
652             "1:\n"
653             "vstr d0, [r0, #0]\n"
654             "subs r3, r3, #1\n"
655             "vstr d1, [r0, #8]\n"
656             "vstr d0, [r0, #16]\n"
657             "vstr d1, [r0, #24]\n"
658             "add r0, r0, #32\n"
659             "bgt 1b\n"
660
661             "sub r0, r0, r1\n"
662             "subs r2, r2, #1\n"
663             "bgt 0b\n"
664
665             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
666         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
667 #endif
668     }
669 };
670
671 class WriteVstmiaBenchmark : public WriteBandwidthBenchmark {
672 public:
673     WriteVstmiaBenchmark() : WriteBandwidthBenchmark() { }
674     virtual ~WriteVstmiaBenchmark() {}
675
676     const char *getName() { return "vstmia"; }
677
678     bool usesNeon() { return true; }
679
680 protected:
681     // Write a given value using vstmia.
682     void bench(size_t num_loops) {
683 #if defined(__ARM_NEON__)
684         asm volatile(
685             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
686
687             "mov r0, %0\n"
688             "mov r1, %1\n"
689             "mov r2, %2\n"
690             "mov r4, #0\n"
691
692             "0:\n"
693             "mov r3, r1, lsr #5\n"
694
695             "add r4, r4, #1\n"
696             "vdup.8 d0, r4\n"
697             "vmov d1, d0\n"
698             "vmov d2, d0\n"
699             "vmov d3, d0\n"
700
701             "1:\n"
702             "subs r3, r3, #1\n"
703             "vstmia r0!, {d0-d3}\n"
704             "bgt 1b\n"
705
706             "sub r0, r0, r1\n"
707             "subs r2, r2, #1\n"
708             "bgt 0b\n"
709
710             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
711         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
712 #endif
713     }
714 };
715
716 class MemsetBenchmark : public WriteBandwidthBenchmark {
717 public:
718     MemsetBenchmark() : WriteBandwidthBenchmark() { }
719     virtual ~MemsetBenchmark() {}
720
721     const char *getName() { return "memset"; }
722
723 protected:
724     void bench(size_t num_loops) {
725         for (size_t i = 0; i < num_loops; i++) {
726             memset(_buffer, (i % 255) + 1, _size);
727         }
728     }
729 };
730
731 class ReadLdrdBenchmark : public SingleBufferBandwidthBenchmark {
732 public:
733     ReadLdrdBenchmark() : SingleBufferBandwidthBenchmark() { }
734     virtual ~ReadLdrdBenchmark() {}
735
736     const char *getName() { return "ldrd"; }
737
738 protected:
739     // Write a given value using strd.
740     void bench(size_t num_loops) {
741         asm volatile(
742             "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
743
744             "mov r0, %0\n"
745             "mov r1, %1\n"
746             "mov r2, %2\n"
747
748             "0:\n"
749             "mov r3, r1, lsr #5\n"
750
751             "1:\n"
752             "subs r3, r3, #1\n"
753             "ldrd r4, r5, [r0]\n"
754             "ldrd r4, r5, [r0, #8]\n"
755             "ldrd r4, r5, [r0, #16]\n"
756             "ldrd r4, r5, [r0, #24]\n"
757             "add  r0, r0, #32\n"
758             "bgt 1b\n"
759
760             "sub r0, r0, r1\n"
761             "subs r2, r2, #1\n"
762             "bgt 0b\n"
763
764             "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
765           :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
766     }
767 };
768
769 class ReadLdmiaBenchmark : public SingleBufferBandwidthBenchmark {
770 public:
771     ReadLdmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
772     virtual ~ReadLdmiaBenchmark() {}
773
774     const char *getName() { return "ldmia"; }
775
776 protected:
777       // Write a given value using stmia.
778       void bench(size_t num_loops) {
779           asm volatile(
780               "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
781
782               "mov r0, %0\n"
783               "mov r1, %1\n"
784               "mov r2, %2\n"
785
786               "0:\n"
787               "mov r3, r1, lsr #5\n"
788
789               "1:\n"
790               "subs r3, r3, #1\n"
791               "ldmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
792               "bgt 1b\n"
793
794               "sub r0, r0, r1\n"
795               "subs r2, r2, #1\n"
796               "bgt 0b\n"
797
798               "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
799         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
800     }
801 };
802
803 class ReadVld1Benchmark : public SingleBufferBandwidthBenchmark {
804 public:
805     ReadVld1Benchmark() : SingleBufferBandwidthBenchmark() { }
806     virtual ~ReadVld1Benchmark() {}
807
808     const char *getName() { return "vld1"; }
809
810     bool usesNeon() { return true; }
811
812 protected:
813     // Write a given value using vst.
814     void bench(size_t num_loops) {
815 #if defined(__ARM_NEON__)
816         asm volatile(
817             "stmfd sp!, {r0,r1,r2,r3}\n"
818
819             "mov r0, %0\n"
820             "mov r1, %1\n"
821             "mov r2, %2\n"
822
823             "0:\n"
824             "mov r3, r1, lsr #5\n"
825
826             "1:\n"
827             "subs r3, r3, #1\n"
828             "vld1.8 {d0-d3}, [r0:128]!\n"
829             "bgt 1b\n"
830
831             "sub r0, r0, r1\n"
832             "subs r2, r2, #1\n"
833             "bgt 0b\n"
834
835             "ldmfd sp!, {r0,r1,r2,r3}\n"
836         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
837 #endif
838     }
839 };
840
841 class ReadVldrBenchmark : public SingleBufferBandwidthBenchmark {
842 public:
843     ReadVldrBenchmark() : SingleBufferBandwidthBenchmark() { }
844     virtual ~ReadVldrBenchmark() {}
845
846     const char *getName() { return "vldr"; }
847
848     bool usesNeon() { return true; }
849
850 protected:
851     // Write a given value using vst.
852     void bench(size_t num_loops) {
853 #if defined(__ARM_NEON__)
854         asm volatile(
855             "stmfd sp!, {r0,r1,r2,r3}\n"
856
857             "mov r0, %0\n"
858             "mov r1, %1\n"
859             "mov r2, %2\n"
860
861             "0:\n"
862             "mov r3, r1, lsr #5\n"
863
864             "1:\n"
865             "vldr d0, [r0, #0]\n"
866             "subs r3, r3, #1\n"
867             "vldr d1, [r0, #8]\n"
868             "vldr d0, [r0, #16]\n"
869             "vldr d1, [r0, #24]\n"
870             "add r0, r0, #32\n"
871             "bgt 1b\n"
872
873             "sub r0, r0, r1\n"
874             "subs r2, r2, #1\n"
875             "bgt 0b\n"
876
877             "ldmfd sp!, {r0,r1,r2,r3}\n"
878         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
879 #endif
880     }
881 };
882
883
884 class ReadVldmiaBenchmark : public SingleBufferBandwidthBenchmark {
885 public:
886     ReadVldmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
887     virtual ~ReadVldmiaBenchmark() {}
888
889     const char *getName() { return "vldmia"; }
890
891     bool usesNeon() { return true; }
892
893 protected:
894     // Write a given value using vstmia.
895     void bench(size_t num_loops) {
896 #if defined(__ARM_NEON__)
897         asm volatile(
898             "stmfd sp!, {r0,r1,r2,r3}\n"
899
900             "mov r0, %0\n"
901             "mov r1, %1\n"
902             "mov r2, %2\n"
903
904             "0:\n"
905             "mov r3, r1, lsr #5\n"
906
907             "1:\n"
908             "subs r3, r3, #1\n"
909             "vldmia r0!, {d0-d3}\n"
910             "bgt 1b\n"
911
912             "sub r0, r0, r1\n"
913             "subs r2, r2, #1\n"
914             "bgt 0b\n"
915
916             "ldmfd sp!, {r0,r1,r2,r3}\n"
917         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
918 #endif
919     }
920 };
921
922 #endif  // __BANDWIDTH_H__