OSDN Git Service

mksquashfsimge.sh: Support creating a sparse image
[android-x86/system-extras.git] / tests / memtest / bandwidth.h
1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #ifndef __BANDWIDTH_H__
18 #define __BANDWIDTH_H__
19
20 #include "memtest.h"
21
22 // Bandwidth Class definitions.
23 class BandwidthBenchmark {
24 public:
25     BandwidthBenchmark()
26         : _size(0),
27           _num_warm_loops(DEFAULT_NUM_WARM_LOOPS),
28           _num_loops(DEFAULT_NUM_LOOPS) {}
29     virtual ~BandwidthBenchmark() {}
30
31     bool run() {
32         if (_size == 0) {
33             return false;
34         }
35         if (!canRun()) {
36             return false;
37         }
38
39         bench(_num_warm_loops);
40
41         nsecs_t t = system_time();
42         bench(_num_loops);
43         t = system_time() - t;
44
45         _mb_per_sec = (_size*(_num_loops/_BYTES_PER_MB))/(t/_NUM_NS_PER_SEC);
46
47         return true;
48     }
49
50     bool canRun() { return !usesNeon() || isNeonSupported(); }
51
52     virtual bool setSize(size_t size) = 0;
53
54     virtual const char *getName() = 0;
55
56     virtual bool verify() = 0;
57
58     virtual bool usesNeon() { return false; }
59
60     bool isNeonSupported() {
61 #if defined(__ARM_NEON__)
62         return true;
63 #else
64         return false;
65 #endif
66     }
67
68     // Accessors/mutators.
69     double mb_per_sec() { return _mb_per_sec; }
70     size_t num_warm_loops() { return _num_warm_loops; }
71     size_t num_loops() { return _num_loops; }
72     size_t size() { return _size; }
73
74     void set_num_warm_loops(size_t num_warm_loops) {
75         _num_warm_loops = num_warm_loops;
76     }
77     void set_num_loops(size_t num_loops) { _num_loops = num_loops; }
78
79     // Static constants
80     static const unsigned int DEFAULT_NUM_WARM_LOOPS = 1000000;
81     static const unsigned int DEFAULT_NUM_LOOPS = 20000000;
82
83 protected:
84     virtual void bench(size_t num_loops) = 0;
85
86     double _mb_per_sec;
87     size_t _size;
88     size_t _num_warm_loops;
89     size_t _num_loops;
90
91 private:
92     // Static constants
93     static const double _NUM_NS_PER_SEC = 1000000000.0;
94     static const double _BYTES_PER_MB = 1024.0* 1024.0;
95 };
96
97 class CopyBandwidthBenchmark : public BandwidthBenchmark {
98 public:
99     CopyBandwidthBenchmark() : BandwidthBenchmark(), _src(NULL), _dst(NULL) { }
100
101     bool setSize(size_t size) {
102         if (_src) {
103            free(_src);
104         }
105         if (_dst) {
106             free(_dst);
107         }
108
109         if (size == 0) {
110             _size = DEFAULT_COPY_SIZE;
111         } else {
112             _size = size;
113         }
114
115         _src = reinterpret_cast<char*>(memalign(64, _size));
116         if (!_src) {
117             perror("Failed to allocate memory for test.");
118             return false;
119         }
120         _dst = reinterpret_cast<char*>(memalign(64, _size));
121         if (!_dst) {
122             perror("Failed to allocate memory for test.");
123             return false;
124         }
125
126         return true;
127     }
128     virtual ~CopyBandwidthBenchmark() {
129         if (_src) {
130             free(_src);
131             _src = NULL;
132         }
133         if (_dst) {
134             free(_dst);
135             _dst = NULL;
136         }
137     }
138
139     bool verify() {
140         memset(_src, 0x23, _size);
141         memset(_dst, 0, _size);
142         bench(1);
143         if (memcmp(_src, _dst, _size) != 0) {
144             printf("Buffers failed to compare after one loop.\n");
145             return false;
146         }
147
148         memset(_src, 0x23, _size);
149         memset(_dst, 0, _size);
150         _num_loops = 2;
151         bench(2);
152         if (memcmp(_src, _dst, _size) != 0) {
153             printf("Buffers failed to compare after two loops.\n");
154             return false;
155         }
156
157         return true;
158     }
159
160 protected:
161     char *_src;
162     char *_dst;
163
164     static const unsigned int DEFAULT_COPY_SIZE = 8000;
165 };
166
167 class CopyLdrdStrdBenchmark : public CopyBandwidthBenchmark {
168 public:
169     CopyLdrdStrdBenchmark() : CopyBandwidthBenchmark() { }
170     virtual ~CopyLdrdStrdBenchmark() {}
171
172     const char *getName() { return "ldrd/strd"; }
173
174 protected:
175     // Copy using ldrd/strd instructions.
176     void bench(size_t num_loops) {
177         asm volatile(
178             "stmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
179
180             "mov r0, %0\n"
181             "mov r1, %1\n"
182             "mov r2, %2\n"
183             "mov r3, %3\n"
184
185             "0:\n"
186             "mov r4, r2, lsr #6\n"
187
188             "1:\n"
189             "ldrd r6, r7, [r0]\n"
190             "strd r6, r7, [r1]\n"
191             "ldrd r6, r7, [r0, #8]\n"
192             "strd r6, r7, [r1, #8]\n"
193             "ldrd r6, r7, [r0, #16]\n"
194             "strd r6, r7, [r1, #16]\n"
195             "ldrd r6, r7, [r0, #24]\n"
196             "strd r6, r7, [r1, #24]\n"
197             "ldrd r6, r7, [r0, #32]\n"
198             "strd r6, r7, [r1, #32]\n"
199             "ldrd r6, r7, [r0, #40]\n"
200             "strd r6, r7, [r1, #40]\n"
201             "ldrd r6, r7, [r0, #48]\n"
202             "strd r6, r7, [r1, #48]\n"
203             "ldrd r6, r7, [r0, #56]\n"
204             "strd r6, r7, [r1, #56]\n"
205
206             "add  r0, r0, #64\n"
207             "add  r1, r1, #64\n"
208             "subs r4, r4, #1\n"
209             "bgt 1b\n"
210
211             "sub r0, r0, r2\n"
212             "sub r1, r1, r2\n"
213             "subs r3, r3, #1\n"
214             "bgt 0b\n"
215
216             "ldmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
217         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
218     }
219 };
220
221 class CopyLdmiaStmiaBenchmark : public CopyBandwidthBenchmark {
222 public:
223     CopyLdmiaStmiaBenchmark() : CopyBandwidthBenchmark() { }
224     virtual ~CopyLdmiaStmiaBenchmark() {}
225
226     const char *getName() { return "ldmia/stmia"; }
227
228 protected:
229     // Copy using ldmia/stmia instructions.
230     void bench(size_t num_loops) {
231         asm volatile(
232             "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
233
234             "mov r0, %0\n"
235             "mov r1, %1\n"
236             "mov r2, %2\n"
237             "mov r3, %3\n"
238
239             "0:\n"
240             "mov r4, r2, lsr #6\n"
241
242             "1:\n"
243             "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
244             "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
245             "subs r4, r4, #1\n"
246             "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
247             "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
248             "bgt 1b\n"
249
250             "sub r0, r0, r2\n"
251             "sub r1, r1, r2\n"
252             "subs r3, r3, #1\n"
253             "bgt 0b\n"
254
255             "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
256         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
257     }
258 };
259
260 class CopyVld1Vst1Benchmark : public CopyBandwidthBenchmark {
261 public:
262     CopyVld1Vst1Benchmark() : CopyBandwidthBenchmark() { }
263     virtual ~CopyVld1Vst1Benchmark() {}
264
265     const char *getName() { return "vld1/vst1"; }
266
267     bool usesNeon() { return true; }
268
269 protected:
270     // Copy using vld1/vst1 instructions.
271     void bench(size_t num_loops) {
272 #if defined(__ARM_NEON__)
273         asm volatile(
274             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
275
276             "mov r0, %0\n"
277             "mov r1, %1\n"
278             "mov r2, %2\n"
279             "mov r3, %3\n"
280
281             "0:\n"
282             "mov r4, r2, lsr #6\n"
283
284             "1:\n"
285             "vld1.8 {d0-d3}, [r0]!\n"
286             "vld1.8 {d4-d7}, [r0]!\n"
287             "subs r4, r4, #1\n"
288             "vst1.8 {d0-d3}, [r1:128]!\n"
289             "vst1.8 {d4-d7}, [r1:128]!\n"
290             "bgt 1b\n"
291
292             "sub r0, r0, r2\n"
293             "sub r1, r1, r2\n"
294             "subs r3, r3, #1\n"
295             "bgt 0b\n"
296
297             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
298         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
299 #endif
300     }
301 };
302
303 class CopyVldrVstrBenchmark : public CopyBandwidthBenchmark {
304 public:
305     CopyVldrVstrBenchmark() : CopyBandwidthBenchmark() { }
306     virtual ~CopyVldrVstrBenchmark() {}
307
308     const char *getName() { return "vldr/vstr"; }
309
310     bool usesNeon() { return true; }
311
312 protected:
313     // Copy using vldr/vstr instructions.
314     void bench(size_t num_loops) {
315 #if defined(__ARM_NEON__)
316         asm volatile(
317             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
318
319             "mov r0, %0\n"
320             "mov r1, %1\n"
321             "mov r2, %2\n"
322             "mov r3, %3\n"
323
324             "0:\n"
325             "mov r4, r2, lsr #6\n"
326
327             "1:\n"
328             "vldr d0, [r0, #0]\n"
329             "subs r4, r4, #1\n"
330             "vldr d1, [r0, #8]\n"
331             "vstr d0, [r1, #0]\n"
332             "vldr d0, [r0, #16]\n"
333             "vstr d1, [r1, #8]\n"
334             "vldr d1, [r0, #24]\n"
335             "vstr d0, [r1, #16]\n"
336             "vldr d0, [r0, #32]\n"
337             "vstr d1, [r1, #24]\n"
338             "vldr d1, [r0, #40]\n"
339             "vstr d0, [r1, #32]\n"
340             "vldr d0, [r0, #48]\n"
341             "vstr d1, [r1, #40]\n"
342             "vldr d1, [r0, #56]\n"
343             "vstr d0, [r1, #48]\n"
344             "add r0, r0, #64\n"
345             "vstr d1, [r1, #56]\n"
346             "add r1, r1, #64\n"
347             "bgt 1b\n"
348
349             "sub r0, r0, r2\n"
350             "sub r1, r1, r2\n"
351             "subs r3, r3, #1\n"
352             "bgt 0b\n"
353
354             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
355         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
356 #endif
357     }
358 };
359
360 class CopyVldmiaVstmiaBenchmark : public CopyBandwidthBenchmark {
361 public:
362     CopyVldmiaVstmiaBenchmark() : CopyBandwidthBenchmark() { }
363     virtual ~CopyVldmiaVstmiaBenchmark() {}
364
365     const char *getName() { return "vldmia/vstmia"; }
366
367     bool usesNeon() { return true; }
368
369 protected:
370     // Copy using vldmia/vstmia instructions.
371     void bench(size_t num_loops) {
372 #if defined(__ARM_NEON__)
373         asm volatile(
374             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
375
376             "mov r0, %0\n"
377             "mov r1, %1\n"
378             "mov r2, %2\n"
379             "mov r3, %3\n"
380
381             "0:\n"
382             "mov r4, r2, lsr #6\n"
383
384             "1:\n"
385             "vldmia r0!, {d0-d7}\n"
386             "subs r4, r4, #1\n"
387             "vstmia r1!, {d0-d7}\n"
388             "bgt 1b\n"
389
390             "sub r0, r0, r2\n"
391             "sub r1, r1, r2\n"
392             "subs r3, r3, #1\n"
393             "bgt 0b\n"
394
395             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
396         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
397 #endif
398     }
399 };
400
401 class MemcpyBenchmark : public CopyBandwidthBenchmark {
402 public:
403     MemcpyBenchmark() : CopyBandwidthBenchmark() { }
404     virtual ~MemcpyBenchmark() {}
405
406     const char *getName() { return "memcpy"; }
407
408 protected:
409     void bench(size_t num_loops) {
410         for (size_t i = 0; i < num_loops; i++) {
411             memcpy(_dst, _src, _size);
412         }
413     }
414 };
415
416 class SingleBufferBandwidthBenchmark : public BandwidthBenchmark {
417 public:
418     SingleBufferBandwidthBenchmark() : BandwidthBenchmark(), _buffer(NULL) { }
419     virtual ~SingleBufferBandwidthBenchmark() {
420         if (_buffer) {
421             free(_buffer);
422             _buffer = NULL;
423         }
424     }
425
426     bool setSize(size_t size) {
427         if (_buffer) {
428             free(_buffer);
429             _buffer = NULL;
430         }
431
432         if (_size == 0) {
433             _size = DEFAULT_SINGLE_BUFFER_SIZE;
434         } else {
435             _size = size;
436         }
437
438         _buffer = reinterpret_cast<char*>(memalign(64, _size));
439         if (!_buffer) {
440             perror("Failed to allocate memory for test.");
441             return false;
442         }
443         memset(_buffer, 0, _size);
444
445         return true;
446     }
447
448     bool verify() { return true; }
449
450 protected:
451     char *_buffer;
452
453     static const unsigned int DEFAULT_SINGLE_BUFFER_SIZE = 16000;
454 };
455
456 class WriteBandwidthBenchmark : public SingleBufferBandwidthBenchmark {
457 public:
458     WriteBandwidthBenchmark() : SingleBufferBandwidthBenchmark() { }
459     virtual ~WriteBandwidthBenchmark() { }
460
461     bool verify() {
462         memset(_buffer, 0, _size);
463         bench(1);
464         for (size_t i = 0; i < _size; i++) {
465             if (_buffer[i] != 1) {
466                 printf("Buffer failed to compare after one loop.\n");
467                 return false;
468             }
469         }
470
471         memset(_buffer, 0, _size);
472         bench(2);
473         for (size_t i = 0; i < _size; i++) {
474             if (_buffer[i] != 2) {
475                 printf("Buffer failed to compare after two loops.\n");
476                 return false;
477             }
478         }
479
480         return true;
481     }
482 };
483
484 class WriteStrdBenchmark : public WriteBandwidthBenchmark {
485 public:
486     WriteStrdBenchmark() : WriteBandwidthBenchmark() { }
487     virtual ~WriteStrdBenchmark() {}
488
489     const char *getName() { return "strd"; }
490
491 protected:
492     // Write a given value using strd.
493     void bench(size_t num_loops) {
494         asm volatile(
495             "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
496
497             "mov r0, %0\n"
498             "mov r1, %1\n"
499             "mov r2, %2\n"
500
501             "mov r4, #0\n"
502             "mov r5, #0\n"
503
504             "0:\n"
505             "mov r3, r1, lsr #5\n"
506
507             "add r4, r4, #0x01010101\n"
508             "mov r5, r4\n"
509
510             "1:\n"
511             "subs r3, r3, #1\n"
512             "strd r4, r5, [r0]\n"
513             "strd r4, r5, [r0, #8]\n"
514             "strd r4, r5, [r0, #16]\n"
515             "strd r4, r5, [r0, #24]\n"
516             "add  r0, r0, #32\n"
517             "bgt 1b\n"
518
519             "sub r0, r0, r1\n"
520             "subs r2, r2, #1\n"
521             "bgt 0b\n"
522
523             "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
524           :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
525     }
526 };
527
528 class WriteStmiaBenchmark : public WriteBandwidthBenchmark {
529 public:
530     WriteStmiaBenchmark() : WriteBandwidthBenchmark() { }
531     virtual ~WriteStmiaBenchmark() {}
532
533     const char *getName() { return "stmia"; }
534
535 protected:
536       // Write a given value using stmia.
537       void bench(size_t num_loops) {
538           asm volatile(
539               "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
540
541               "mov r0, %0\n"
542               "mov r1, %1\n"
543               "mov r2, %2\n"
544
545               "mov r4, #0\n"
546
547               "0:\n"
548               "mov r3, r1, lsr #5\n"
549
550               "add r4, r4, #0x01010101\n"
551               "mov r5, r4\n"
552               "mov r6, r4\n"
553               "mov r7, r4\n"
554               "mov r8, r4\n"
555               "mov r9, r4\n"
556               "mov r10, r4\n"
557               "mov r11, r4\n"
558
559               "1:\n"
560               "subs r3, r3, #1\n"
561               "stmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
562               "bgt 1b\n"
563
564               "sub r0, r0, r1\n"
565               "subs r2, r2, #1\n"
566               "bgt 0b\n"
567
568               "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
569         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
570     }
571 };
572
573 class WriteVst1Benchmark : public WriteBandwidthBenchmark {
574 public:
575     WriteVst1Benchmark() : WriteBandwidthBenchmark() { }
576     virtual ~WriteVst1Benchmark() {}
577
578     const char *getName() { return "vst1"; }
579
580     bool usesNeon() { return true; }
581
582 protected:
583     // Write a given value using vst.
584     void bench(size_t num_loops) {
585 #if defined(__ARM_NEON__)
586         asm volatile(
587             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
588
589             "mov r0, %0\n"
590             "mov r1, %1\n"
591             "mov r2, %2\n"
592             "mov r4, #0\n"
593
594             "0:\n"
595             "mov r3, r1, lsr #5\n"
596
597             "add r4, r4, #1\n"
598             "vdup.8 d0, r4\n"
599             "vmov d1, d0\n"
600             "vmov d2, d0\n"
601             "vmov d3, d0\n"
602
603             "1:\n"
604             "subs r3, r3, #1\n"
605             "vst1.8 {d0-d3}, [r0:128]!\n"
606             "bgt 1b\n"
607
608             "sub r0, r0, r1\n"
609             "subs r2, r2, #1\n"
610             "bgt 0b\n"
611
612             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
613         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
614 #endif
615     }
616 };
617
618 class WriteVstrBenchmark : public WriteBandwidthBenchmark {
619 public:
620     WriteVstrBenchmark() : WriteBandwidthBenchmark() { }
621     virtual ~WriteVstrBenchmark() {}
622
623     const char *getName() { return "vstr"; }
624
625     bool usesNeon() { return true; }
626
627 protected:
628     // Write a given value using vst.
629     void bench(size_t num_loops) {
630 #if defined(__ARM_NEON__)
631         asm volatile(
632             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
633
634             "mov r0, %0\n"
635             "mov r1, %1\n"
636             "mov r2, %2\n"
637             "mov r4, #0\n"
638
639             "0:\n"
640             "mov r3, r1, lsr #5\n"
641
642             "add r4, r4, #1\n"
643             "vdup.8 d0, r4\n"
644             "vmov d1, d0\n"
645             "vmov d2, d0\n"
646             "vmov d3, d0\n"
647
648             "1:\n"
649             "vstr d0, [r0, #0]\n"
650             "subs r3, r3, #1\n"
651             "vstr d1, [r0, #8]\n"
652             "vstr d0, [r0, #16]\n"
653             "vstr d1, [r0, #24]\n"
654             "add r0, r0, #32\n"
655             "bgt 1b\n"
656
657             "sub r0, r0, r1\n"
658             "subs r2, r2, #1\n"
659             "bgt 0b\n"
660
661             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
662         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
663 #endif
664     }
665 };
666
667 class WriteVstmiaBenchmark : public WriteBandwidthBenchmark {
668 public:
669     WriteVstmiaBenchmark() : WriteBandwidthBenchmark() { }
670     virtual ~WriteVstmiaBenchmark() {}
671
672     const char *getName() { return "vstmia"; }
673
674     bool usesNeon() { return true; }
675
676 protected:
677     // Write a given value using vstmia.
678     void bench(size_t num_loops) {
679 #if defined(__ARM_NEON__)
680         asm volatile(
681             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
682
683             "mov r0, %0\n"
684             "mov r1, %1\n"
685             "mov r2, %2\n"
686             "mov r4, #0\n"
687
688             "0:\n"
689             "mov r3, r1, lsr #5\n"
690
691             "add r4, r4, #1\n"
692             "vdup.8 d0, r4\n"
693             "vmov d1, d0\n"
694             "vmov d2, d0\n"
695             "vmov d3, d0\n"
696
697             "1:\n"
698             "subs r3, r3, #1\n"
699             "vstmia r0!, {d0-d3}\n"
700             "bgt 1b\n"
701
702             "sub r0, r0, r1\n"
703             "subs r2, r2, #1\n"
704             "bgt 0b\n"
705
706             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
707         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
708 #endif
709     }
710 };
711
712 class MemsetBenchmark : public WriteBandwidthBenchmark {
713 public:
714     MemsetBenchmark() : WriteBandwidthBenchmark() { }
715     virtual ~MemsetBenchmark() {}
716
717     const char *getName() { return "memset"; }
718
719 protected:
720     void bench(size_t num_loops) {
721         for (size_t i = 0; i < num_loops; i++) {
722             memset(_buffer, (i % 255) + 1, _size);
723         }
724     }
725 };
726
727 class ReadLdrdBenchmark : public SingleBufferBandwidthBenchmark {
728 public:
729     ReadLdrdBenchmark() : SingleBufferBandwidthBenchmark() { }
730     virtual ~ReadLdrdBenchmark() {}
731
732     const char *getName() { return "ldrd"; }
733
734 protected:
735     // Write a given value using strd.
736     void bench(size_t num_loops) {
737         asm volatile(
738             "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
739
740             "mov r0, %0\n"
741             "mov r1, %1\n"
742             "mov r2, %2\n"
743
744             "0:\n"
745             "mov r3, r1, lsr #5\n"
746
747             "1:\n"
748             "subs r3, r3, #1\n"
749             "ldrd r4, r5, [r0]\n"
750             "ldrd r4, r5, [r0, #8]\n"
751             "ldrd r4, r5, [r0, #16]\n"
752             "ldrd r4, r5, [r0, #24]\n"
753             "add  r0, r0, #32\n"
754             "bgt 1b\n"
755
756             "sub r0, r0, r1\n"
757             "subs r2, r2, #1\n"
758             "bgt 0b\n"
759
760             "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
761           :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
762     }
763 };
764
765 class ReadLdmiaBenchmark : public SingleBufferBandwidthBenchmark {
766 public:
767     ReadLdmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
768     virtual ~ReadLdmiaBenchmark() {}
769
770     const char *getName() { return "ldmia"; }
771
772 protected:
773       // Write a given value using stmia.
774       void bench(size_t num_loops) {
775           asm volatile(
776               "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
777
778               "mov r0, %0\n"
779               "mov r1, %1\n"
780               "mov r2, %2\n"
781
782               "0:\n"
783               "mov r3, r1, lsr #5\n"
784
785               "1:\n"
786               "subs r3, r3, #1\n"
787               "ldmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
788               "bgt 1b\n"
789
790               "sub r0, r0, r1\n"
791               "subs r2, r2, #1\n"
792               "bgt 0b\n"
793
794               "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
795         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
796     }
797 };
798
799 class ReadVld1Benchmark : public SingleBufferBandwidthBenchmark {
800 public:
801     ReadVld1Benchmark() : SingleBufferBandwidthBenchmark() { }
802     virtual ~ReadVld1Benchmark() {}
803
804     const char *getName() { return "vld1"; }
805
806     bool usesNeon() { return true; }
807
808 protected:
809     // Write a given value using vst.
810     void bench(size_t num_loops) {
811 #if defined(__ARM_NEON__)
812         asm volatile(
813             "stmfd sp!, {r0,r1,r2,r3}\n"
814
815             "mov r0, %0\n"
816             "mov r1, %1\n"
817             "mov r2, %2\n"
818
819             "0:\n"
820             "mov r3, r1, lsr #5\n"
821
822             "1:\n"
823             "subs r3, r3, #1\n"
824             "vld1.8 {d0-d3}, [r0:128]!\n"
825             "bgt 1b\n"
826
827             "sub r0, r0, r1\n"
828             "subs r2, r2, #1\n"
829             "bgt 0b\n"
830
831             "ldmfd sp!, {r0,r1,r2,r3}\n"
832         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
833 #endif
834     }
835 };
836
837 class ReadVldrBenchmark : public SingleBufferBandwidthBenchmark {
838 public:
839     ReadVldrBenchmark() : SingleBufferBandwidthBenchmark() { }
840     virtual ~ReadVldrBenchmark() {}
841
842     const char *getName() { return "vldr"; }
843
844     bool usesNeon() { return true; }
845
846 protected:
847     // Write a given value using vst.
848     void bench(size_t num_loops) {
849 #if defined(__ARM_NEON__)
850         asm volatile(
851             "stmfd sp!, {r0,r1,r2,r3}\n"
852
853             "mov r0, %0\n"
854             "mov r1, %1\n"
855             "mov r2, %2\n"
856
857             "0:\n"
858             "mov r3, r1, lsr #5\n"
859
860             "1:\n"
861             "vldr d0, [r0, #0]\n"
862             "subs r3, r3, #1\n"
863             "vldr d1, [r0, #8]\n"
864             "vldr d0, [r0, #16]\n"
865             "vldr d1, [r0, #24]\n"
866             "add r0, r0, #32\n"
867             "bgt 1b\n"
868
869             "sub r0, r0, r1\n"
870             "subs r2, r2, #1\n"
871             "bgt 0b\n"
872
873             "ldmfd sp!, {r0,r1,r2,r3}\n"
874         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
875 #endif
876     }
877 };
878
879
880 class ReadVldmiaBenchmark : public SingleBufferBandwidthBenchmark {
881 public:
882     ReadVldmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
883     virtual ~ReadVldmiaBenchmark() {}
884
885     const char *getName() { return "vldmia"; }
886
887     bool usesNeon() { return true; }
888
889 protected:
890     // Write a given value using vstmia.
891     void bench(size_t num_loops) {
892 #if defined(__ARM_NEON__)
893         asm volatile(
894             "stmfd sp!, {r0,r1,r2,r3}\n"
895
896             "mov r0, %0\n"
897             "mov r1, %1\n"
898             "mov r2, %2\n"
899
900             "0:\n"
901             "mov r3, r1, lsr #5\n"
902
903             "1:\n"
904             "subs r3, r3, #1\n"
905             "vldmia r0!, {d0-d3}\n"
906             "bgt 1b\n"
907
908             "sub r0, r0, r1\n"
909             "subs r2, r2, #1\n"
910             "bgt 0b\n"
911
912             "ldmfd sp!, {r0,r1,r2,r3}\n"
913         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
914 #endif
915     }
916 };
917
918 #endif  // __BANDWIDTH_H__