OSDN Git Service

gallivm: Introduce lp_format_intrinsic.
authorJose Fonseca <jfonseca@vmware.com>
Sun, 3 Apr 2016 21:52:53 +0000 (22:52 +0100)
committerJose Fonseca <jfonseca@vmware.com>
Sun, 3 Apr 2016 23:06:09 +0000 (00:06 +0100)
For adding .v4f32 like suffixes to intrinsics, taking special care for
scalar case, which was being often neglected.

This fixes invalid IR when doing mipmap filtering on SSE2 (the only
case where we'd use intrinsics with scalars.)

Reviewed-by: Roland Scheidegger <sroland@vmware.com>
src/gallium/auxiliary/gallivm/lp_bld_arit.c
src/gallium/auxiliary/gallivm/lp_bld_intr.c
src/gallium/auxiliary/gallivm/lp_bld_intr.h

index 587c83a..0c43617 100644 (file)
@@ -1493,7 +1493,7 @@ lp_build_abs(struct lp_build_context *bld,
 
    if(type.floating) {
       char intrinsic[32];
-      util_snprintf(intrinsic, sizeof intrinsic, "llvm.fabs.v%uf%u", type.length, type.width);
+      lp_format_intrinsic(intrinsic, sizeof intrinsic, "llvm.fabs", vec_type);
       return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a);
    }
 
@@ -1880,9 +1880,7 @@ lp_build_round_arch(struct lp_build_context *bld,
          break;
       }
 
-      util_snprintf(intrinsic, sizeof intrinsic, "%s.v%uf%u",
-                    intrinsic_root, type.length, type.width);
-
+      lp_format_intrinsic(intrinsic, sizeof intrinsic, intrinsic_root, bld->vec_type);
       return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a);
    }
    else /* (util_cpu_caps.has_altivec) */
@@ -2026,7 +2024,7 @@ lp_build_floor(struct lp_build_context *bld,
 
       if (type.width != 32) {
          char intrinsic[32];
-         util_snprintf(intrinsic, sizeof intrinsic, "llvm.floor.v%uf%u", type.length, type.width);
+         lp_format_intrinsic(intrinsic, sizeof intrinsic, "llvm.floor", vec_type);
          return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a);
       }
 
@@ -2101,7 +2099,7 @@ lp_build_ceil(struct lp_build_context *bld,
 
       if (type.width != 32) {
          char intrinsic[32];
-         util_snprintf(intrinsic, sizeof intrinsic, "llvm.ceil.v%uf%u", type.length, type.width);
+         lp_format_intrinsic(intrinsic, sizeof intrinsic, "llvm.ceil", vec_type);
          return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a);
       }
 
@@ -2438,15 +2436,8 @@ lp_build_sqrt(struct lp_build_context *bld,
 
    assert(lp_check_value(type, a));
 
-   /* TODO: optimize the constant case */
-
    assert(type.floating);
-   if (type.length == 1) {
-      util_snprintf(intrinsic, sizeof intrinsic, "llvm.sqrt.f%u", type.width);
-   }
-   else {
-      util_snprintf(intrinsic, sizeof intrinsic, "llvm.sqrt.v%uf%u", type.length, type.width);
-   }
+   lp_format_intrinsic(intrinsic, sizeof intrinsic, "llvm.sqrt", vec_type);
 
    return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a);
 }
index 30f4863..e153389 100644 (file)
@@ -45,6 +45,7 @@
 
 
 #include "util/u_debug.h"
+#include "util/u_string.h"
 
 #include "lp_bld_const.h"
 #include "lp_bld_intr.h"
 #include "lp_bld_pack.h"
 
 
+void
+lp_format_intrinsic(char *name,
+                    size_t size,
+                    const char *name_root,
+                    LLVMTypeRef type)
+{
+   unsigned length = 0;
+   unsigned width;
+   char c;
+
+   LLVMTypeKind kind = LLVMGetTypeKind(type);
+   if (kind == LLVMVectorTypeKind) {
+      length = LLVMGetVectorSize(type);
+      type = LLVMGetElementType(type);
+      kind = LLVMGetTypeKind(type);
+   }
+
+   switch (kind) {
+   case LLVMIntegerTypeKind:
+      c = 'i';
+      width = LLVMGetIntTypeWidth(type);
+      break;
+   case LLVMFloatTypeKind:
+      c = 'f';
+      width = 32;
+      break;
+   case LLVMDoubleTypeKind:
+      c = 'f';
+      width = 64;
+      break;
+   default:
+      assert(0);
+   }
+
+   if (length) {
+      util_snprintf(name, size, "%s.v%u%c%u", name_root, length, c, width);
+   } else {
+      util_snprintf(name, size, "%s.%c%u", name_root, c, width);
+   }
+}
+
+
 LLVMValueRef
 lp_declare_intrinsic(LLVMModuleRef module,
                      const char *name,
index a54b367..7d80ac2 100644 (file)
 #define LP_MAX_FUNC_ARGS 32
 
 
+void
+lp_format_intrinsic(char *name,
+                    size_t size,
+                    const char *name_root,
+                    LLVMTypeRef type);
+
 LLVMValueRef
 lp_declare_intrinsic(LLVMModuleRef module,
                      const char *name,