OSDN Git Service

Fix Frac() returning 1.0.
[android-x86/external-swiftshader.git] / src / Reactor / LLVMReactor.cpp
index e1c4c92..d8bda70 100644 (file)
 #include "Memory.hpp"
 #include "MutexLock.hpp"
 
-#include <xmmintrin.h>
 #include <fstream>
 
+#if defined(__i386__) || defined(__x86_64__)
+#include <xmmintrin.h>
+#endif
+
 #if defined(__x86_64__) && defined(_WIN32)
 extern "C" void X86CompilationCallback()
 {
@@ -68,7 +71,7 @@ namespace
        llvm::Module *module = nullptr;
        llvm::Function *function = nullptr;
 
-       sw::BackoffLock codegenMutex;
+       sw::MutexLock codegenMutex;
 }
 
 namespace sw
@@ -451,8 +454,13 @@ namespace sw
                return value;
        }
 
-       Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index)
+       Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
        {
+               if(unsignedIndex && sizeof(void*) == 8)
+               {
+                       index = createZExt(index, Long::getType());
+               }
+
                assert(ptr->getType()->getContainedType(0) == type);
                return V(::builder->CreateGEP(ptr, index));
        }
@@ -2136,6 +2144,14 @@ namespace sw
                return UnpackLow(RValue<Byte8>(byte8), RValue<Byte8>(byte8));
        }
 
+       RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y)
+       {
+               Value *xx = Nucleus::createInsertElement(V(UndefValue::get(VectorType::get(Int::getType(), 2))), x.value, 0);
+               Value *yy = Nucleus::createInsertElement(V(UndefValue::get(VectorType::get(Int::getType(), 2))), y.value, 0);
+
+               return UnpackLow(As<Byte8>(xx), As<Byte8>(yy));
+       }
+
        RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
        {
                if(CPUID::supportsMMX2())
@@ -3017,11 +3033,11 @@ namespace sw
 
                if(!saturate || !CPUID::supportsSSE4_1())
                {
-                       *this = Short4(Int4(int4));
+                       *this = Short4(int4);
                }
                else
                {
-                       *this = As<Short4>(Int2(As<Int4>(x86::packusdw(As<UInt4>(int4), As<UInt4>(int4)))));
+                       *this = As<Short4>(Int2(As<Int4>(x86::packusdw(int4, int4))));
                }
        }
 
@@ -3276,6 +3292,12 @@ namespace sw
                }
        }
 
+       Short8::Short8(short c)
+       {
+               int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
+               storeValue(Nucleus::createConstantVector(constantVector, getType()));
+       }
+
        Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7)
        {
                int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
@@ -3354,6 +3376,12 @@ namespace sw
                return T(VectorType::get(Short::getType(), 8));
        }
 
+       UShort8::UShort8(unsigned short c)
+       {
+               int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
+               storeValue(Nucleus::createConstantVector(constantVector, getType()));
+       }
+
        UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7)
        {
                int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
@@ -4875,7 +4903,7 @@ namespace sw
 
                        // Each Short is packed into each Int in the (Short | Short) format.
                        // Shifting by 16 will retrieve the original Short value.
-                       // Shitfing an Int will propagate the sign bit, which will work
+                       // Shifting an Int will propagate the sign bit, which will work
                        // for both positive and negative values of a Short.
                        *this >>= 16;
                }
@@ -5193,7 +5221,7 @@ namespace sw
                else
                {
                        RValue<Int4> greater = CmpNLE(x, y);
-                       return x & greater | y & ~greater;
+                       return (x & greater) | (y & ~greater);
                }
        }
 
@@ -5206,7 +5234,7 @@ namespace sw
                else
                {
                        RValue<Int4> less = CmpLT(x, y);
-                       return x & less | y & ~less;
+                       return (x & less) | (y & ~less);
                }
        }
 
@@ -5533,7 +5561,7 @@ namespace sw
                else
                {
                        RValue<UInt4> greater = CmpNLE(x, y);
-                       return x & greater | y & ~greater;
+                       return (x & greater) | (y & ~greater);
                }
        }
 
@@ -5546,13 +5574,13 @@ namespace sw
                else
                {
                        RValue<UInt4> less = CmpLT(x, y);
-                       return x & less | y & ~less;
+                       return (x & less) | (y & ~less);
                }
        }
 
        RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y)
        {
-               return x86::packusdw(x, y);   // FIXME: Fallback required
+               return x86::packusdw(As<Int4>(x), As<Int4>(y));
        }
 
        Type *UInt4::getType()
@@ -5709,16 +5737,16 @@ namespace sw
 
        RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
        {
-               if(exactAtPow2)
-               {
-                       // rcpss uses a piecewise-linear approximation which minimizes the relative error
-                       // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
-                       return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
-               }
-               else
-               {
-                       return x86::rcpss(x);
-               }
+               #if defined(__i386__) || defined(__x86_64__)
+                       if(exactAtPow2)
+                       {
+                               // rcpss uses a piecewise-linear approximation which minimizes the relative error
+                               // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
+                               return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
+                       }
+               #endif
+
+               return x86::rcpss(x);
        }
 
        RValue<Float> RcpSqrt_pp(RValue<Float> x)
@@ -6089,16 +6117,16 @@ namespace sw
 
        RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
        {
-               if(exactAtPow2)
-               {
-                       // rcpps uses a piecewise-linear approximation which minimizes the relative error
-                       // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
-                       return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
-               }
-               else
-               {
-                       return x86::rcpps(x);
-               }
+               #if defined(__i386__) || defined(__x86_64__)
+                       if(exactAtPow2)
+                       {
+                               // rcpps uses a piecewise-linear approximation which minimizes the relative error
+                               // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
+                               return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
+                       }
+               #endif
+
+               return x86::rcpps(x);
        }
 
        RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
@@ -6227,16 +6255,22 @@ namespace sw
 
        RValue<Float4> Frac(RValue<Float4> x)
        {
+               Float4 frc;
+
                if(CPUID::supportsSSE4_1())
                {
-                       return x - x86::floorps(x);
+                       frc = x - x86::floorps(x);
                }
                else
                {
-                       Float4 frc = x - Float4(Int4(x));   // Signed fractional part
+                       frc = x - Float4(Int4(x));   // Signed fractional part.
 
-                       return frc + As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1)));
+                       frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1.0f)));   // Add 1.0 if negative.
                }
+
+               // x - floor(x) can be 1.0 for very small negative x.
+               // Clamp against the value just below 1.0.
+               return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
        }
 
        RValue<Float4> Floor(RValue<Float4> x)
@@ -6270,17 +6304,17 @@ namespace sw
 
        RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset)
        {
-               return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), V(Nucleus::createConstantInt(offset))));
+               return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), V(Nucleus::createConstantInt(offset)), false));
        }
 
        RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
        {
-               return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value));
+               return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, false));
        }
 
        RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
        {
-               return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value));
+               return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, true));
        }
 
        RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, int offset)
@@ -6342,12 +6376,10 @@ namespace sw
                Nucleus::createUnreachable();
        }
 
-       bool branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB)
+       void branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB)
        {
                Nucleus::createCondBr(cmp.value, bodyBB, endBB);
                Nucleus::setInsertBlock(bodyBB);
-
-               return true;
        }
 
        RValue<Long> Ticks()
@@ -6888,7 +6920,7 @@ namespace sw
                        return As<Byte8>(V(::builder->CreateCall2(packuswb, As<MMX>(x).value, As<MMX>(y).value)));
                }
 
-               RValue<UShort8> packusdw(RValue<UInt4> x, RValue<UInt4> y)
+               RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y)
                {
                        if(CPUID::supportsSSE4_1())
                        {
@@ -6898,8 +6930,10 @@ namespace sw
                        }
                        else
                        {
-                               // FIXME: Not an exact replacement!
-                               return As<UShort8>(packssdw(As<Int4>(x - UInt4(0x00008000, 0x00008000, 0x00008000, 0x00008000)), As<Int4>(y - UInt4(0x00008000, 0x00008000, 0x00008000, 0x00008000))) + Short8(0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u));
+                               RValue<Int4> bx = (x & ~(x >> 31)) - Int4(0x8000);
+                               RValue<Int4> by = (y & ~(y >> 31)) - Int4(0x8000);
+
+                               return As<UShort8>(packssdw(bx, by) + Short8(0x8000u));
                        }
                }