From db61b1844e11f29a06d6dc4aab87ebb4f0aabfdd Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Tue, 8 Dec 2020 10:26:18 -0300
Subject: [PATCH] [compiler-rt] [builtins] Support conversion between fp16 and
 fp128

This patch adds both extendhftf2 and trunctfhf2 to support
conversion between half-precision and quad-precision floating-point
values. They are built iff the compiler supports _Float16.

Some notes on ARM plaforms: while fp16 is supported on all
architectures, _Float16 is supported only for 32-bit ARM, 64-bit ARM,
and SPIR (as indicated by clang/docs/LanguageExtensions.rst). Also,
fp16 is a storage format and 64-bit ARM supports floating-point
convert precision to half as base armv8-a instruction.

This patch does not change the ABI for 32-bit ARM, it will continue
to pass _Float16 as uint16.

This re-enabled revert done by https://reviews.llvm.org/rGb534beabeed3ba1777cd0ff9ce552d077e496726

Differential Revision: https://reviews.llvm.org/D92242
---
 compiler-rt/lib/builtins/CMakeLists.txt           |   2 +
 compiler-rt/lib/builtins/extendhftf2.c            |  23 ++++
 compiler-rt/lib/builtins/trunctfhf2.c             |  23 ++++
 compiler-rt/test/builtins/Unit/extendhftf2_test.c |  95 ++++++++++++++++
 compiler-rt/test/builtins/Unit/trunctfhf2_test.c  | 125 ++++++++++++++++++++++
 5 files changed, 268 insertions(+)
 create mode 100644 compiler-rt/lib/builtins/extendhftf2.c
 create mode 100644 compiler-rt/lib/builtins/trunctfhf2.c
 create mode 100644 compiler-rt/test/builtins/Unit/extendhftf2_test.c
 create mode 100644 compiler-rt/test/builtins/Unit/trunctfhf2_test.c

diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index 8bb367e6ce4..d84f4d09e53 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -179,6 +179,7 @@ set(GENERIC_TF_SOURCES
   divtc3.c
   divtf3.c
   extenddftf2.c
+  extendhftf2.c
   extendsftf2.c
   fixtfdi.c
   fixtfsi.c
@@ -197,6 +198,7 @@ set(GENERIC_TF_SOURCES
   powitf2.c
   subtf3.c
   trunctfdf2.c
+  trunctfhf2.c
   trunctfsf2.c
 )
 
diff --git a/compiler-rt/lib/builtins/extendhftf2.c b/compiler-rt/lib/builtins/extendhftf2.c
new file mode 100644
index 00000000000..aefe9737d34
--- /dev/null
+++ b/compiler-rt/lib/builtins/extendhftf2.c
@@ -0,0 +1,23 @@
+//===-- lib/extendhftf2.c - half -> quad conversion ---------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) &&                     \
+    defined(COMPILER_RT_HAS_FLOAT16)
+#define SRC_HALF
+#define DST_QUAD
+#include "fp_extend_impl.inc"
+
+COMPILER_RT_ABI long double __extendhftf2(_Float16 a) {
+  return __extendXfYf2__(a);
+}
+
+#endif
diff --git a/compiler-rt/lib/builtins/trunctfhf2.c b/compiler-rt/lib/builtins/trunctfhf2.c
new file mode 100644
index 00000000000..e3a2309d954
--- /dev/null
+++ b/compiler-rt/lib/builtins/trunctfhf2.c
@@ -0,0 +1,23 @@
+//===-- lib/trunctfhf2.c - quad -> half conversion ----------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) &&                     \
+    defined(COMPILER_RT_HAS_FLOAT16)
+#define SRC_QUAD
+#define DST_HALF
+#include "fp_trunc_impl.inc"
+
+COMPILER_RT_ABI _Float16 __trunctfhf2(long double a) {
+  return __truncXfYf2__(a);
+}
+
+#endif
diff --git a/compiler-rt/test/builtins/Unit/extendhftf2_test.c b/compiler-rt/test/builtins/Unit/extendhftf2_test.c
new file mode 100644
index 00000000000..7d3ea3049e8
--- /dev/null
+++ b/compiler-rt/test/builtins/Unit/extendhftf2_test.c
@@ -0,0 +1,95 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+// REQUIRES: librt_has_extendhftf2
+
+#include "int_lib.h"
+#include <stdio.h>
+
+#if __LDBL_MANT_DIG__ == 113 && defined(COMPILER_RT_HAS_FLOAT16)
+
+#include "fp_test.h"
+
+COMPILER_RT_ABI long double __extendhftf2(TYPE_FP16 a);
+
+int test__extendhftf2(TYPE_FP16 a, uint64_t expectedHi, uint64_t expectedLo) {
+  long double x = __extendhftf2(a);
+  int ret = compareResultLD(x, expectedHi, expectedLo);
+
+  if (ret) {
+    printf("error in test__extendhftf2(%#.4x) = %.20Lf, "
+           "expected %.20Lf\n",
+           toRep16(a), x,
+           fromRep128(expectedHi, expectedLo));
+  }
+  return ret;
+}
+
+char assumption_1[sizeof(TYPE_FP16) * CHAR_BIT == 16] = {0};
+
+#endif
+
+int main() {
+#if __LDBL_MANT_DIG__ == 113 && defined(COMPILER_RT_HAS_FLOAT16)
+  // qNaN
+  if (test__extendhftf2(makeQNaN16(),
+                        UINT64_C(0x7fff800000000000),
+                        UINT64_C(0x0)))
+    return 1;
+  // NaN
+  if (test__extendhftf2(makeNaN16(UINT16_C(0x0100)),
+                        UINT64_C(0x7fff400000000000),
+                        UINT64_C(0x0)))
+    return 1;
+  // inf
+  if (test__extendhftf2(makeInf16(),
+                        UINT64_C(0x7fff000000000000),
+                        UINT64_C(0x0)))
+    return 1;
+  if (test__extendhftf2(-makeInf16(),
+                        UINT64_C(0xffff000000000000),
+                        UINT64_C(0x0)))
+    return 1;
+  // zero
+  if (test__extendhftf2(fromRep16(0x0U),
+                        UINT64_C(0x0), UINT64_C(0x0)))
+    return 1;
+  if (test__extendhftf2(fromRep16(0x8000U),
+                        UINT64_C(0x8000000000000000),
+                        UINT64_C(0x0)))
+    return 1;
+  // denormal
+  if (test__extendhftf2(fromRep16(0x0010U),
+                        UINT64_C(0x3feb000000000000),
+                        UINT64_C(0x0000000000000000)))
+    return 1;
+  if (test__extendhftf2(fromRep16(0x0001U),
+                        UINT64_C(0x3fe7000000000000),
+                        UINT64_C(0x0000000000000000)))
+    return 1;
+  if (test__extendhftf2(fromRep16(0x8001U),
+                        UINT64_C(0xbfe7000000000000),
+                        UINT64_C(0x0000000000000000)))
+    return 1;
+
+  // pi
+  if (test__extendhftf2(fromRep16(0x4248U),
+                        UINT64_C(0x4000920000000000),
+                        UINT64_C(0x0000000000000000)))
+    return 1;
+  if (test__extendhftf2(fromRep16(0xc248U),
+                        UINT64_C(0xc000920000000000),
+                        UINT64_C(0x0000000000000000)))
+    return 1;
+
+  if (test__extendhftf2(fromRep16(0x508cU),
+                        UINT64_C(0x4004230000000000),
+                        UINT64_C(0x0)))
+    return 1;
+  if (test__extendhftf2(fromRep16(0x1bb7U),
+                        UINT64_C(0x3ff6edc000000000),
+                        UINT64_C(0x0)))
+    return 1;
+#else
+  printf("skipped\n");
+#endif
+  return 0;
+}
diff --git a/compiler-rt/test/builtins/Unit/trunctfhf2_test.c b/compiler-rt/test/builtins/Unit/trunctfhf2_test.c
new file mode 100644
index 00000000000..9c8c4afd1b8
--- /dev/null
+++ b/compiler-rt/test/builtins/Unit/trunctfhf2_test.c
@@ -0,0 +1,125 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+// REQUIRES: librt_has_trunctfhf2
+
+#include "int_lib.h"
+#include <stdio.h>
+
+#if __LDBL_MANT_DIG__ == 113 && defined(COMPILER_RT_HAS_FLOAT16)
+
+#include "fp_test.h"
+
+TYPE_FP16 __trunctfhf2(long double a);
+
+int test__trunctfhf2(long double a, uint16_t expected) {
+  TYPE_FP16 x = __trunctfhf2(a);
+  int ret = compareResultH(x, expected);
+
+  if (ret) {
+    printf("error in test__trunctfhf2(%.20Lf) = %#.4x, "
+           "expected %#.4x\n",
+           a, toRep16(x), expected);
+  }
+  return ret;
+}
+
+char assumption_1[sizeof(TYPE_FP16) * CHAR_BIT == 16] = {0};
+
+#endif
+
+int main() {
+#if __LDBL_MANT_DIG__ == 113 && defined(COMPILER_RT_HAS_FLOAT16)
+  // qNaN
+  if (test__trunctfhf2(makeQNaN128(),
+                       UINT16_C(0x7e00)))
+    return 1;
+  // NaN
+  if (test__trunctfhf2(makeNaN128(UINT64_C(0x810000000000)),
+                       UINT16_C(0x7e00)))
+    return 1;
+  // inf
+  if (test__trunctfhf2(makeInf128(),
+                       UINT16_C(0x7c00)))
+    return 1;
+  if (test__trunctfhf2(-makeInf128(),
+                       UINT16_C(0xfc00)))
+    return 1;
+  // zero
+  if (test__trunctfhf2(0.0L, UINT16_C(0x0)))
+    return 1;
+  if (test__trunctfhf2(-0.0L, UINT16_C(0x8000)))
+    return 1;
+
+  if (test__trunctfhf2(3.1415926535L,
+                       UINT16_C(0x4248)))
+    return 1;
+  if (test__trunctfhf2(-3.1415926535L,
+                       UINT16_C(0xc248)))
+    return 1;
+  if (test__trunctfhf2(0x1.987124876876324p+100L,
+                       UINT16_C(0x7c00)))
+    return 1;
+  if (test__trunctfhf2(0x1.987124876876324p+12L,
+                       UINT16_C(0x6e62)))
+    return 1;
+  if (test__trunctfhf2(0x1.0p+0L,
+                       UINT16_C(0x3c00)))
+    return 1;
+  if (test__trunctfhf2(0x1.0p-14L,
+                       UINT16_C(0x0400)))
+    return 1;
+  // denormal
+  if (test__trunctfhf2(0x1.0p-20L,
+                       UINT16_C(0x0010)))
+    return 1;
+  if (test__trunctfhf2(0x1.0p-24L,
+                       UINT16_C(0x0001)))
+    return 1;
+  if (test__trunctfhf2(-0x1.0p-24L,
+                       UINT16_C(0x8001)))
+    return 1;
+  if (test__trunctfhf2(0x1.5p-25L,
+                       UINT16_C(0x0001)))
+    return 1;
+  // and back to zero
+  if (test__trunctfhf2(0x1.0p-25L,
+                       UINT16_C(0x0000)))
+    return 1;
+  if (test__trunctfhf2(-0x1.0p-25L,
+                       UINT16_C(0x8000)))
+    return 1;
+  // max (precise)
+  if (test__trunctfhf2(65504.0L,
+                       UINT16_C(0x7bff)))
+    return 1;
+  // max (rounded)
+  if (test__trunctfhf2(65519.0L,
+                       UINT16_C(0x7bff)))
+    return 1;
+  // max (to +inf)
+  if (test__trunctfhf2(65520.0L,
+                       UINT16_C(0x7c00)))
+    return 1;
+  if (test__trunctfhf2(65536.0L,
+                       UINT16_C(0x7c00)))
+    return 1;
+  if (test__trunctfhf2(-65520.0L,
+                       UINT16_C(0xfc00)))
+    return 1;
+
+  if (test__trunctfhf2(0x1.23a2abb4a2ddee355f36789abcdep+5L,
+                       UINT16_C(0x508f)))
+    return 1;
+  if (test__trunctfhf2(0x1.e3d3c45bd3abfd98b76a54cc321fp-9L,
+                       UINT16_C(0x1b8f)))
+    return 1;
+  if (test__trunctfhf2(0x1.234eebb5faa678f4488693abcdefp+453L,
+                       UINT16_C(0x7c00)))
+    return 1;
+  if (test__trunctfhf2(0x1.edcba9bb8c76a5a43dd21f334634p-43L,
+                       UINT16_C(0x0)))
+    return 1;
+#else
+  printf("skipped\n");
+#endif
+  return 0;
+}
-- 
2.11.0