OSDN Git Service

Add vldr vstr type benchmarking.
authorChristopher Ferris <cferris@google.com>
Tue, 2 Jul 2013 23:38:45 +0000 (16:38 -0700)
committerChristopher Ferris <cferris@google.com>
Wed, 3 Jul 2013 00:45:47 +0000 (17:45 -0700)
Merge from internal master.

(cherry-picked from commit 65d2c7894a93f66ea41d55f843285e7e7bdbf488)

Change-Id: I854b5f310fa44efcaf93af14b84ffe5673cc46c0

tests/memtest/bandwidth.cpp
tests/memtest/bandwidth.h
tests/memtest/memtest.cpp

index 523cabe..cf406e6 100644 (file)
@@ -101,8 +101,10 @@ BandwidthBenchmark *createBandwidthBenchmarkObject(arg_t values) {
         bench = new CopyLdrdStrdBenchmark();
     } else if (strcmp(name, "copy_ldmia_stmia") == 0) {
         bench = new CopyLdmiaStmiaBenchmark();
-    } else if (strcmp(name, "copy_vld_vst") == 0) {
-        bench = new CopyVldVstBenchmark();
+    } else if (strcmp(name, "copy_vld1_vst1") == 0) {
+        bench = new CopyVld1Vst1Benchmark();
+    } else if (strcmp(name, "copy_vldr_vstr") == 0) {
+        bench = new CopyVldrVstrBenchmark();
     } else if (strcmp(name, "copy_vldmia_vstmia") == 0) {
         bench = new CopyVldmiaVstmiaBenchmark();
     } else if (strcmp(name, "memcpy") == 0) {
@@ -111,8 +113,10 @@ BandwidthBenchmark *createBandwidthBenchmarkObject(arg_t values) {
         bench = new WriteStrdBenchmark();
     } else if (strcmp(name, "write_stmia") == 0) {
         bench = new WriteStmiaBenchmark();
-    } else if (strcmp(name, "write_vst") == 0) {
-        bench = new WriteVstBenchmark();
+    } else if (strcmp(name, "write_vst1") == 0) {
+        bench = new WriteVst1Benchmark();
+    } else if (strcmp(name, "write_vstr") == 0) {
+        bench = new WriteVstrBenchmark();
     } else if (strcmp(name, "write_vstmia") == 0) {
         bench = new WriteVstmiaBenchmark();
     } else if (strcmp(name, "memset") == 0) {
@@ -121,8 +125,10 @@ BandwidthBenchmark *createBandwidthBenchmarkObject(arg_t values) {
         bench = new ReadLdrdBenchmark();
     } else if (strcmp(name, "read_ldmia") == 0) {
         bench = new ReadLdmiaBenchmark();
-    } else if (strcmp(name, "read_vld") == 0) {
-        bench = new ReadVldBenchmark();
+    } else if (strcmp(name, "read_vld1") == 0) {
+        bench = new ReadVld1Benchmark();
+    } else if (strcmp(name, "read_vldr") == 0) {
+        bench = new ReadVldrBenchmark();
     } else if (strcmp(name, "read_vldmia") == 0) {
         bench = new ReadVldmiaBenchmark();
     } else {
@@ -418,7 +424,8 @@ int copy_bandwidth(int argc, char** argv) {
     std::vector<BandwidthBenchmark*> bench_objs;
     bench_objs.push_back(new CopyLdrdStrdBenchmark());
     bench_objs.push_back(new CopyLdmiaStmiaBenchmark());
-    bench_objs.push_back(new CopyVldVstBenchmark());
+    bench_objs.push_back(new CopyVld1Vst1Benchmark());
+    bench_objs.push_back(new CopyVldrVstrBenchmark());
     bench_objs.push_back(new CopyVldmiaVstmiaBenchmark());
     bench_objs.push_back(new MemcpyBenchmark());
 
@@ -432,7 +439,8 @@ int write_bandwidth(int argc, char** argv) {
     std::vector<BandwidthBenchmark*> bench_objs;
     bench_objs.push_back(new WriteStrdBenchmark());
     bench_objs.push_back(new WriteStmiaBenchmark());
-    bench_objs.push_back(new WriteVstBenchmark());
+    bench_objs.push_back(new WriteVst1Benchmark());
+    bench_objs.push_back(new WriteVstrBenchmark());
     bench_objs.push_back(new WriteVstmiaBenchmark());
     bench_objs.push_back(new MemsetBenchmark());
 
@@ -447,7 +455,8 @@ int read_bandwidth(int argc, char** argv) {
     std::vector<BandwidthBenchmark*> bench_objs;
     bench_objs.push_back(new ReadLdrdBenchmark());
     bench_objs.push_back(new ReadLdmiaBenchmark());
-    bench_objs.push_back(new ReadVldBenchmark());
+    bench_objs.push_back(new ReadVld1Benchmark());
+    bench_objs.push_back(new ReadVldrBenchmark());
     bench_objs.push_back(new ReadVldmiaBenchmark());
 
     if (!run_bandwidth_benchmark(argc, argv, "read", bench_objs)) {
index b890f80..a09d082 100644 (file)
@@ -141,7 +141,7 @@ public:
         memset(_dst, 0, _size);
         bench(1);
         if (memcmp(_src, _dst, _size) != 0) {
-            printf("Strings failed to compare after one loop.\n");
+            printf("Buffers failed to compare after one loop.\n");
             return false;
         }
 
@@ -150,7 +150,7 @@ public:
         _num_loops = 2;
         bench(2);
         if (memcmp(_src, _dst, _size) != 0) {
-            printf("Strings failed to compare after two loops.\n");
+            printf("Buffers failed to compare after two loops.\n");
             return false;
         }
 
@@ -257,17 +257,17 @@ protected:
     }
 };
 
-class CopyVldVstBenchmark : public CopyBandwidthBenchmark {
+class CopyVld1Vst1Benchmark : public CopyBandwidthBenchmark {
 public:
-    CopyVldVstBenchmark() : CopyBandwidthBenchmark() { }
-    virtual ~CopyVldVstBenchmark() {}
+    CopyVld1Vst1Benchmark() : CopyBandwidthBenchmark() { }
+    virtual ~CopyVld1Vst1Benchmark() {}
 
-    const char *getName() { return "vld/vst"; }
+    const char *getName() { return "vld1/vst1"; }
 
     bool usesNeon() { return true; }
 
 protected:
-    // Copy using vld/vst instructions.
+    // Copy using vld1/vst1 instructions.
     void bench(size_t num_loops) {
 #if defined(__ARM_NEON__)
         asm volatile(
@@ -300,6 +300,63 @@ protected:
     }
 };
 
+class CopyVldrVstrBenchmark : public CopyBandwidthBenchmark {
+public:
+    CopyVldrVstrBenchmark() : CopyBandwidthBenchmark() { }
+    virtual ~CopyVldrVstrBenchmark() {}
+
+    const char *getName() { return "vldr/vstr"; }
+
+    bool usesNeon() { return true; }
+
+protected:
+    // Copy using vldr/vstr instructions.
+    void bench(size_t num_loops) {
+#if defined(__ARM_NEON__)
+        asm volatile(
+            "stmfd sp!, {r0,r1,r2,r3,r4}\n"
+
+            "mov r0, %0\n"
+            "mov r1, %1\n"
+            "mov r2, %2\n"
+            "mov r3, %3\n"
+
+            "0:\n"
+            "mov r4, r2, lsr #6\n"
+
+            "1:\n"
+            "vldr d0, [r0, #0]\n"
+            "subs r4, r4, #1\n"
+            "vldr d1, [r0, #8]\n"
+            "vstr d0, [r1, #0]\n"
+            "vldr d0, [r0, #16]\n"
+            "vstr d1, [r1, #8]\n"
+            "vldr d1, [r0, #24]\n"
+            "vstr d0, [r1, #16]\n"
+            "vldr d0, [r0, #32]\n"
+            "vstr d1, [r1, #24]\n"
+            "vldr d1, [r0, #40]\n"
+            "vstr d0, [r1, #32]\n"
+            "vldr d0, [r0, #48]\n"
+            "vstr d1, [r1, #40]\n"
+            "vldr d1, [r0, #56]\n"
+            "vstr d0, [r1, #48]\n"
+            "add r0, r0, #64\n"
+            "vstr d1, [r1, #56]\n"
+            "add r1, r1, #64\n"
+            "bgt 1b\n"
+
+            "sub r0, r0, r2\n"
+            "sub r1, r1, r2\n"
+            "subs r3, r3, #1\n"
+            "bgt 0b\n"
+
+            "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
+        :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
+#endif
+    }
+};
+
 class CopyVldmiaVstmiaBenchmark : public CopyBandwidthBenchmark {
 public:
     CopyVldmiaVstmiaBenchmark() : CopyBandwidthBenchmark() { }
@@ -310,7 +367,7 @@ public:
     bool usesNeon() { return true; }
 
 protected:
-    // Copy using vld/vst instructions.
+    // Copy using vldmia/vstmia instructions.
     void bench(size_t num_loops) {
 #if defined(__ARM_NEON__)
         asm volatile(
@@ -406,7 +463,7 @@ public:
         bench(1);
         for (size_t i = 0; i < _size; i++) {
             if (_buffer[i] != 1) {
-                printf("Strings failed to compare after one loop.\n");
+                printf("Buffer failed to compare after one loop.\n");
                 return false;
             }
         }
@@ -415,7 +472,7 @@ public:
         bench(2);
         for (size_t i = 0; i < _size; i++) {
             if (_buffer[i] != 2) {
-                printf("Strings failed to compare after two loops.\n");
+                printf("Buffer failed to compare after two loops.\n");
                 return false;
             }
         }
@@ -513,12 +570,12 @@ protected:
     }
 };
 
-class WriteVstBenchmark : public WriteBandwidthBenchmark {
+class WriteVst1Benchmark : public WriteBandwidthBenchmark {
 public:
-    WriteVstBenchmark() : WriteBandwidthBenchmark() { }
-    virtual ~WriteVstBenchmark() {}
+    WriteVst1Benchmark() : WriteBandwidthBenchmark() { }
+    virtual ~WriteVst1Benchmark() {}
 
-    const char *getName() { return "vst"; }
+    const char *getName() { return "vst1"; }
 
     bool usesNeon() { return true; }
 
@@ -558,6 +615,55 @@ protected:
     }
 };
 
+class WriteVstrBenchmark : public WriteBandwidthBenchmark {
+public:
+    WriteVstrBenchmark() : WriteBandwidthBenchmark() { }
+    virtual ~WriteVstrBenchmark() {}
+
+    const char *getName() { return "vstr"; }
+
+    bool usesNeon() { return true; }
+
+protected:
+    // Write a given value using vst.
+    void bench(size_t num_loops) {
+#if defined(__ARM_NEON__)
+        asm volatile(
+            "stmfd sp!, {r0,r1,r2,r3,r4}\n"
+
+            "mov r0, %0\n"
+            "mov r1, %1\n"
+            "mov r2, %2\n"
+            "mov r4, #0\n"
+
+            "0:\n"
+            "mov r3, r1, lsr #5\n"
+
+            "add r4, r4, #1\n"
+            "vdup.8 d0, r4\n"
+            "vmov d1, d0\n"
+            "vmov d2, d0\n"
+            "vmov d3, d0\n"
+
+            "1:\n"
+            "vstr d0, [r0, #0]\n"
+            "subs r3, r3, #1\n"
+            "vstr d1, [r0, #8]\n"
+            "vstr d0, [r0, #16]\n"
+            "vstr d1, [r0, #24]\n"
+            "add r0, r0, #32\n"
+            "bgt 1b\n"
+
+            "sub r0, r0, r1\n"
+            "subs r2, r2, #1\n"
+            "bgt 0b\n"
+
+            "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
+        :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
+#endif
+    }
+};
+
 class WriteVstmiaBenchmark : public WriteBandwidthBenchmark {
 public:
     WriteVstmiaBenchmark() : WriteBandwidthBenchmark() { }
@@ -690,12 +796,12 @@ protected:
     }
 };
 
-class ReadVldBenchmark : public SingleBufferBandwidthBenchmark {
+class ReadVld1Benchmark : public SingleBufferBandwidthBenchmark {
 public:
-    ReadVldBenchmark() : SingleBufferBandwidthBenchmark() { }
-    virtual ~ReadVldBenchmark() {}
+    ReadVld1Benchmark() : SingleBufferBandwidthBenchmark() { }
+    virtual ~ReadVld1Benchmark() {}
 
-    const char *getName() { return "vld"; }
+    const char *getName() { return "vld1"; }
 
     bool usesNeon() { return true; }
 
@@ -728,6 +834,49 @@ protected:
     }
 };
 
+class ReadVldrBenchmark : public SingleBufferBandwidthBenchmark {
+public:
+    ReadVldrBenchmark() : SingleBufferBandwidthBenchmark() { }
+    virtual ~ReadVldrBenchmark() {}
+
+    const char *getName() { return "vldr"; }
+
+    bool usesNeon() { return true; }
+
+protected:
+    // Write a given value using vst.
+    void bench(size_t num_loops) {
+#if defined(__ARM_NEON__)
+        asm volatile(
+            "stmfd sp!, {r0,r1,r2,r3}\n"
+
+            "mov r0, %0\n"
+            "mov r1, %1\n"
+            "mov r2, %2\n"
+
+            "0:\n"
+            "mov r3, r1, lsr #5\n"
+
+            "1:\n"
+            "vldr d0, [r0, #0]\n"
+            "subs r3, r3, #1\n"
+            "vldr d1, [r0, #8]\n"
+            "vldr d0, [r0, #16]\n"
+            "vldr d1, [r0, #24]\n"
+            "add r0, r0, #32\n"
+            "bgt 1b\n"
+
+            "sub r0, r0, r1\n"
+            "subs r2, r2, #1\n"
+            "bgt 0b\n"
+
+            "ldmfd sp!, {r0,r1,r2,r3}\n"
+        :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
+#endif
+    }
+};
+
+
 class ReadVldmiaBenchmark : public SingleBufferBandwidthBenchmark {
 public:
     ReadVldmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
index 86ec9a4..7b097d3 100644 (file)
@@ -43,15 +43,15 @@ static void usage(char* p) {
            "  write_bandwidth [--size BYTES_TO_WRITE]\n"
            "  read_bandwidth [--size BYTES_TO_COPY]\n"
            "  per_core_bandwidth [--size BYTES]\n"
-           "    --type copy_ldrd_strd | copy_ldmia_stmia | copy_vld_vst |\n"
-           "           copy_vldmia_vstmia | memcpy | write_strd | write_stmia |\n"
-           "           write_vst | write_vstmia | memset | read_ldrd |\n"
-           "           read_ldmia | read_vld | read_vldmia\n"
+           "    --type copy_ldrd_strd | copy_ldmia_stmia | copy_vld1_vst1 |\n"
+           "           copy_vldr_vstr | copy_vldmia_vstmia | memcpy | write_strd |\n"
+           "           write_stmia | write_vst1 | write_vstr | write_vstmia | memset |\n"
+           "           read_ldrd | read_ldmia | read_vld1 | read_vldr | read_vldmia\n"
            "  multithread_bandwidth [--size BYTES]\n"
-           "    --type copy_ldrd_strd | copy_ldmia_stmia | copy_vld_vst |\n"
-           "           copy_vldmia_vstmia | memcpy | write_strd | write_stmia |\n"
-           "           write_vst | write_vstmia | memset | read_ldrd |\n"
-           "           read_ldmia | read_vld | read_vldmia\n"
+           "    --type copy_ldrd_strd | copy_ldmia_stmia | copy_vld1_vst1 |\n"
+           "           copy_vldr_vstr | copy_vldmia_vstmia | memcpy | write_strd |\n"
+           "           write_stmia | write_vst1 | write_vstr | write_vstmia | memset |\n"
+           "           read_ldrd | read_ldmia | read_vld1 | read_vldr | read_vldmia\n"
            "    --num_threads NUM_THREADS_TO_RUN\n"
            "  malloc [fill]\n"
            "  madvise\n"