Fix Frac() returning 1.0.

[android-x86/external-swiftshader.git] / src / Reactor / LLVMReactor.cpp
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp

index e1c4c92..d8bda70 100644 (file)
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -37,9 +37,12 @@
  #include "Memory.hpp"
  #include "MutexLock.hpp"
  
-#include <xmmintrin.h>
  #include <fstream>
  
+#if defined(__i386__) || defined(__x86_64__)
+#include <xmmintrin.h>
+#endif
+
  #if defined(__x86_64__) && defined(_WIN32)
  extern "C" void X86CompilationCallback()
  {
@@ -68,7 +71,7 @@ namespace
         llvm::Module *module = nullptr;
         llvm::Function *function = nullptr;
  
-       sw::BackoffLock codegenMutex;
+       sw::MutexLock codegenMutex;
  }
  
  namespace sw
@@ -451,8 +454,13 @@ namespace sw
                 return value;
         }
  
-       Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index)
+       Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
         {
+               if(unsignedIndex && sizeof(void*) == 8)
+               {
+                       index = createZExt(index, Long::getType());
+               }
+
                 assert(ptr->getType()->getContainedType(0) == type);
                 return V(::builder->CreateGEP(ptr, index));
         }
@@ -2136,6 +2144,14 @@ namespace sw
                 return UnpackLow(RValue<Byte8>(byte8), RValue<Byte8>(byte8));
         }
  
+       RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y)
+       {
+               Value *xx = Nucleus::createInsertElement(V(UndefValue::get(VectorType::get(Int::getType(), 2))), x.value, 0);
+               Value *yy = Nucleus::createInsertElement(V(UndefValue::get(VectorType::get(Int::getType(), 2))), y.value, 0);
+
+               return UnpackLow(As<Byte8>(xx), As<Byte8>(yy));
+       }
+
         RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
         {
                 if(CPUID::supportsMMX2())
@@ -3017,11 +3033,11 @@ namespace sw
  
                 if(!saturate || !CPUID::supportsSSE4_1())
                 {
-                       *this = Short4(Int4(int4));
+                       *this = Short4(int4);
                 }
                 else
                 {
-                       *this = As<Short4>(Int2(As<Int4>(x86::packusdw(As<UInt4>(int4), As<UInt4>(int4)))));
+                       *this = As<Short4>(Int2(As<Int4>(x86::packusdw(int4, int4))));
                 }
         }
  
@@ -3276,6 +3292,12 @@ namespace sw
                 }
         }
  
+       Short8::Short8(short c)
+       {
+               int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
+               storeValue(Nucleus::createConstantVector(constantVector, getType()));
+       }
+
         Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7)
         {
                 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
@@ -3354,6 +3376,12 @@ namespace sw
                 return T(VectorType::get(Short::getType(), 8));
         }
  
+       UShort8::UShort8(unsigned short c)
+       {
+               int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
+               storeValue(Nucleus::createConstantVector(constantVector, getType()));
+       }
+
         UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7)
         {
                 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
@@ -4875,7 +4903,7 @@ namespace sw
  
                         // Each Short is packed into each Int in the (Short | Short) format.
                         // Shifting by 16 will retrieve the original Short value.
-                       // Shitfing an Int will propagate the sign bit, which will work
+                       // Shifting an Int will propagate the sign bit, which will work
                         // for both positive and negative values of a Short.
                         *this >>= 16;
                 }
@@ -5193,7 +5221,7 @@ namespace sw
                 else
                 {
                         RValue<Int4> greater = CmpNLE(x, y);
-                       return x & greater | y & ~greater;
+                       return (x & greater) | (y & ~greater);
                 }
         }
  
@@ -5206,7 +5234,7 @@ namespace sw
                 else
                 {
                         RValue<Int4> less = CmpLT(x, y);
-                       return x & less | y & ~less;
+                       return (x & less) | (y & ~less);
                 }
         }
  
@@ -5533,7 +5561,7 @@ namespace sw
                 else
                 {
                         RValue<UInt4> greater = CmpNLE(x, y);
-                       return x & greater | y & ~greater;
+                       return (x & greater) | (y & ~greater);
                 }
         }
  
@@ -5546,13 +5574,13 @@ namespace sw
                 else
                 {
                         RValue<UInt4> less = CmpLT(x, y);
-                       return x & less | y & ~less;
+                       return (x & less) | (y & ~less);
                 }
         }
  
         RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y)
         {
-               return x86::packusdw(x, y);   // FIXME: Fallback required
+               return x86::packusdw(As<Int4>(x), As<Int4>(y));
         }
  
         Type *UInt4::getType()
@@ -5709,16 +5737,16 @@ namespace sw
  
         RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
         {
-               if(exactAtPow2)
-               {
-                       // rcpss uses a piecewise-linear approximation which minimizes the relative error
-                       // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
-                       return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
-               }
-               else
-               {
-                       return x86::rcpss(x);
-               }
+               #if defined(__i386__) || defined(__x86_64__)
+                       if(exactAtPow2)
+                       {
+                               // rcpss uses a piecewise-linear approximation which minimizes the relative error
+                               // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
+                               return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
+                       }
+               #endif
+
+               return x86::rcpss(x);
         }
  
         RValue<Float> RcpSqrt_pp(RValue<Float> x)
@@ -6089,16 +6117,16 @@ namespace sw
  
         RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
         {
-               if(exactAtPow2)
-               {
-                       // rcpps uses a piecewise-linear approximation which minimizes the relative error
-                       // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
-                       return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
-               }
-               else
-               {
-                       return x86::rcpps(x);
-               }
+               #if defined(__i386__) || defined(__x86_64__)
+                       if(exactAtPow2)
+                       {
+                               // rcpps uses a piecewise-linear approximation which minimizes the relative error
+                               // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
+                               return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
+                       }
+               #endif
+
+               return x86::rcpps(x);
         }
  
         RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
@@ -6227,16 +6255,22 @@ namespace sw
  
         RValue<Float4> Frac(RValue<Float4> x)
         {
+               Float4 frc;
+
                 if(CPUID::supportsSSE4_1())
                 {
-                       return x - x86::floorps(x);
+                       frc = x - x86::floorps(x);
                 }
                 else
                 {
-                       Float4 frc = x - Float4(Int4(x));   // Signed fractional part
+                       frc = x - Float4(Int4(x));   // Signed fractional part.
  
-                       return frc + As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1)));
+                       frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1.0f)));   // Add 1.0 if negative.
                 }
+
+               // x - floor(x) can be 1.0 for very small negative x.
+               // Clamp against the value just below 1.0.
+               return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
         }
  
         RValue<Float4> Floor(RValue<Float4> x)
@@ -6270,17 +6304,17 @@ namespace sw
  
         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset)
         {
-               return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), V(Nucleus::createConstantInt(offset))));
+               return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), V(Nucleus::createConstantInt(offset)), false));
         }
  
         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
         {
-               return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value));
+               return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, false));
         }
  
         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
         {
-               return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value));
+               return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, true));
         }
  
         RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, int offset)
@@ -6342,12 +6376,10 @@ namespace sw
                 Nucleus::createUnreachable();
         }
  
-       bool branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB)
+       void branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB)
         {
                 Nucleus::createCondBr(cmp.value, bodyBB, endBB);
                 Nucleus::setInsertBlock(bodyBB);
-
-               return true;
         }
  
         RValue<Long> Ticks()
@@ -6888,7 +6920,7 @@ namespace sw
                         return As<Byte8>(V(::builder->CreateCall2(packuswb, As<MMX>(x).value, As<MMX>(y).value)));
                 }
  
-               RValue<UShort8> packusdw(RValue<UInt4> x, RValue<UInt4> y)
+               RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y)
                 {
                         if(CPUID::supportsSSE4_1())
                         {
@@ -6898,8 +6930,10 @@ namespace sw
                         }
                         else
                         {
-                               // FIXME: Not an exact replacement!
-                               return As<UShort8>(packssdw(As<Int4>(x - UInt4(0x00008000, 0x00008000, 0x00008000, 0x00008000)), As<Int4>(y - UInt4(0x00008000, 0x00008000, 0x00008000, 0x00008000))) + Short8(0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u));
+                               RValue<Int4> bx = (x & ~(x >> 31)) - Int4(0x8000);
+                               RValue<Int4> by = (y & ~(y >> 31)) - Int4(0x8000);
+
+                               return As<UShort8>(packssdw(bx, by) + Short8(0x8000u));
                         }
                 }