2 * Copyright (C) 2007 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
24 #include <sys/resource.h>
25 #include <sys/syscall.h>
26 #include <sys/types.h>
34 typedef long long nsecs_t;
36 float data_f[1024 * 128];
38 static nsecs_t system_time()
41 t.tv_sec = t.tv_nsec = 0;
42 clock_gettime(CLOCK_MONOTONIC, &t);
43 return nsecs_t(t.tv_sec)*1000000000LL + t.tv_nsec;
46 static void startTime()
48 gTime = system_time();
51 static void endTime(const char *str, double ops)
53 nsecs_t t = system_time() - gTime;
54 double ds = ((double)t) / 1e9;
55 printf("Test: %s, %f Mops\n", str, ops / ds / 1e6);
59 static void test_mad() {
60 for(int i=0; i<1020; i++) {
68 for (int ct=0; ct < (1000 * (1000 / 20)); ct++) {
69 for (int i=0; i < 1000; i++) {
70 data_f[i] = (data_f[i] * 0.02f +
79 data_f[i+9] * 0.02f + 1.f);
83 endTime("scalar mad", 1e9);
89 static void test_fma() {
90 for(int i=0; i<1020 * 4; i++) {
93 float32x4_t c0_02 = vdupq_n_f32(0.02f);
94 float32x4_t c0_04 = vdupq_n_f32(0.04f);
95 float32x4_t c0_05 = vdupq_n_f32(0.05f);
96 float32x4_t c0_10 = vdupq_n_f32(0.1f);
97 float32x4_t c0_20 = vdupq_n_f32(0.2f);
98 float32x4_t c1_00 = vdupq_n_f32(1.0f);
104 for (int ct=0; ct < (1000 * (1000 / 80)); ct++) {
105 for (int i=0; i < 1000; i++) {
107 t = vmulq_f32(vld1q_f32((float32_t *)&data_f[i]), c0_02);
108 t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+4]), c0_04);
109 t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+8]), c0_05);
110 t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+12]), c0_10);
111 t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+16]), c0_20);
112 t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+20]), c0_20);
113 t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+24]), c0_10);
114 t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+28]), c0_05);
115 t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+32]), c0_04);
116 t = vmlaq_f32(t, vld1q_f32((float32_t *)&data_f[i+36]), c0_02);
117 t = vaddq_f32(t, c1_00);
118 vst1q_f32((float32_t *)&data_f[i], t);
122 endTime("neon fma", 1e9);
126 int fp_test(int argc, char** argv) {