OSDN Git Service

[CUDA] Fix order of memcpy arguments in __shfl_*(<64-bit type>).
authorArtem Belevich <tra@google.com>
Wed, 22 Jan 2020 23:12:49 +0000 (15:12 -0800)
committerArtem Belevich <tra@google.com>
Thu, 23 Jan 2020 21:17:52 +0000 (13:17 -0800)
Wrong argument order resulted in broken shfl ops for 64-bit types.

clang/lib/Headers/__clang_cuda_intrinsics.h

index b67461a..c7bff6a 100644 (file)
@@ -45,7 +45,7 @@
     _Static_assert(sizeof(__val) == sizeof(__Bits));                           \
     _Static_assert(sizeof(__Bits) == 2 * sizeof(int));                         \
     __Bits __tmp;                                                              \
-    memcpy(&__val, &__tmp, sizeof(__val));                                     \
+    memcpy(&__tmp, &__val, sizeof(__val));                                \
     __tmp.__a = ::__FnName(__tmp.__a, __offset, __width);                      \
     __tmp.__b = ::__FnName(__tmp.__b, __offset, __width);                      \
     long long __ret;                                                           \
@@ -129,7 +129,7 @@ __MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f,
     _Static_assert(sizeof(__val) == sizeof(__Bits));                           \
     _Static_assert(sizeof(__Bits) == 2 * sizeof(int));                         \
     __Bits __tmp;                                                              \
-    memcpy(&__val, &__tmp, sizeof(__val));                                     \
+    memcpy(&__tmp, &__val, sizeof(__val));                                     \
     __tmp.__a = ::__FnName(__mask, __tmp.__a, __offset, __width);              \
     __tmp.__b = ::__FnName(__mask, __tmp.__b, __offset, __width);              \
     long long __ret;                                                           \