OSDN Git Service

Use atomic operations to specify shared memory access order
authorAlexis Hetu <sugoi@google.com>
Wed, 20 Sep 2017 15:24:52 +0000 (11:24 -0400)
committerAlexis Hétu <sugoi@google.com>
Thu, 21 Sep 2017 18:02:59 +0000 (18:02 +0000)
TSAN detected many data race errors in the SwiftShader Renderer
class. x86 has a strong memory ordering model which guarantees
that changes are observed in the same order by other threads.
However, C++ does not provide such guarantees unless specified
using atomic operations. In order to fix these, a new AtomicInt
class was added which is a basically a wrapper class for
std::atomic<int> and which only exposes the portion of the API
required by SwiftShader.

Since std::atomic isn't available on older versions of Android,
a fallback class was implemented without using std::atomic, which
is closer to the previous implementation. Both classes appear to
work properly after performing a few dEQP tests. Both also perform
similarly.

A few minor changes were made in order to attempt to reduce the use
of atomic integer operations when possible.

Change-Id: Ife6d3a2b6113346f8f8163b692e79c2a0e03b22f
Reviewed-on: https://swiftshader-review.googlesource.com/12308
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
src/Common/Thread.hpp
src/Renderer/Clipper.cpp
src/Renderer/Renderer.cpp
src/Renderer/Renderer.hpp
src/Renderer/Surface.hpp

index 87b90d1..7b4e0ca 100644 (file)
 
 #include <stdlib.h>
 
+#if defined(__clang__)
+#define USE_STD_ATOMIC __has_include(<atomic>) // clang has an explicit check for the availability of atomic
+// atomic is available in C++11 or newer, and in Visual Studio 2012 or newer
+#elif (defined(_MSC_VER) && (_MSC_VER >= 1700)) || (__cplusplus >= 201103L)
+#define USE_STD_ATOMIC 1
+#endif
+
+#if USE_STD_ATOMIC
+#include <atomic>
+#endif
+
 namespace sw
 {
        class Event;
@@ -265,7 +276,7 @@ namespace sw
 
        inline int atomicAdd(volatile int* target, int value)
        {
-               #if defined(_MSC_VER)
+               #if defined(_WIN32)
                        return InterlockedExchangeAdd((volatile long*)target, value) + value;
                #else
                        return __sync_add_and_fetch(target, value);
@@ -280,6 +291,46 @@ namespace sw
                        __asm__ __volatile__ ("nop");
                #endif
        }
+
+       #if USE_STD_ATOMIC
+               class AtomicInt
+               {
+               public:
+                       AtomicInt() : ai() {}
+                       AtomicInt(int i) : ai(i) {}
+
+                       inline operator int() const { return ai.load(std::memory_order_acquire); }
+                       inline void operator=(const AtomicInt& i) { ai.store(i.ai.load(std::memory_order_acquire), std::memory_order_release); }
+                       inline void operator=(int i) { ai.store(i, std::memory_order_release); }
+                       inline void operator--() { ai.fetch_sub(1, std::memory_order_acq_rel); }
+                       inline void operator++() { ai.fetch_add(1, std::memory_order_acq_rel); }
+                       inline int operator--(int) { return ai.fetch_sub(1, std::memory_order_acq_rel) - 1; }
+                       inline int operator++(int) { return ai.fetch_add(1, std::memory_order_acq_rel) + 1; }
+                       inline void operator-=(int i) { ai.fetch_sub(i, std::memory_order_acq_rel); }
+                       inline void operator+=(int i) { ai.fetch_add(i, std::memory_order_acq_rel); }
+               private:
+                       std::atomic<int> ai;
+               };
+       #else
+               class AtomicInt
+               {
+               public:
+                       AtomicInt() {}
+                       AtomicInt(int i) : vi(i) {}
+
+                       inline operator int() const { return vi; } // Note: this isn't a guaranteed atomic operation
+                       inline void operator=(const AtomicInt& i) { sw::atomicExchange(&vi, i.vi); }
+                       inline void operator=(int i) { sw::atomicExchange(&vi, i); }
+                       inline void operator--() { sw::atomicDecrement(&vi); }
+                       inline void operator++() { sw::atomicIncrement(&vi); }
+                       inline int operator--(int) { return sw::atomicDecrement(&vi); }
+                       inline int operator++(int) { return sw::atomicIncrement(&vi); }
+                       inline void operator-=(int i) { sw::atomicAdd(&vi, -i); }
+                       inline void operator+=(int i) { sw::atomicAdd(&vi, i); }
+               private:
+                       volatile int vi;
+               };
+       #endif
 }
 
 #endif   // sw_Thread_hpp
index cfd859f..cf3fe3c 100644 (file)
@@ -60,20 +60,21 @@ namespace sw
 
                if(clipFlagsOr & CLIP_USER)
                {
+                       int clipFlags = draw.clipFlags;
                        DrawData &data = *draw.data;
 
                        if(polygon.n >= 3) {
-                       if(draw.clipFlags & CLIP_PLANE0) clipPlane(polygon, data.clipPlane[0]);
+                       if(clipFlags & CLIP_PLANE0) clipPlane(polygon, data.clipPlane[0]);
                        if(polygon.n >= 3) {
-                       if(draw.clipFlags & CLIP_PLANE1) clipPlane(polygon, data.clipPlane[1]);
+                       if(clipFlags & CLIP_PLANE1) clipPlane(polygon, data.clipPlane[1]);
                        if(polygon.n >= 3) {
-                       if(draw.clipFlags & CLIP_PLANE2) clipPlane(polygon, data.clipPlane[2]);
+                       if(clipFlags & CLIP_PLANE2) clipPlane(polygon, data.clipPlane[2]);
                        if(polygon.n >= 3) {
-                       if(draw.clipFlags & CLIP_PLANE3) clipPlane(polygon, data.clipPlane[3]);
+                       if(clipFlags & CLIP_PLANE3) clipPlane(polygon, data.clipPlane[3]);
                        if(polygon.n >= 3) {
-                       if(draw.clipFlags & CLIP_PLANE4) clipPlane(polygon, data.clipPlane[4]);
+                       if(clipFlags & CLIP_PLANE4) clipPlane(polygon, data.clipPlane[4]);
                        if(polygon.n >= 3) {
-                       if(draw.clipFlags & CLIP_PLANE5) clipPlane(polygon, data.clipPlane[5]);
+                       if(clipFlags & CLIP_PLANE5) clipPlane(polygon, data.clipPlane[5]);
                        }}}}}}
                }
 
index 0869697..e918c43 100644 (file)
@@ -314,7 +314,7 @@ namespace sw
                                        Query* q = *query;
                                        if(includePrimitivesWrittenQueries || (q->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN))
                                        {
-                                               atomicIncrement(&(q->reference));
+                                               ++q->reference; // Atomic
                                                draw->queries->push_back(q);
                                        }
                                }
@@ -645,7 +645,7 @@ namespace sw
                        draw->references = (count + batch - 1) / batch;
 
                        schedulerMutex.lock();
-                       nextDraw++;
+                       ++nextDraw; // Atomic
                        schedulerMutex.unlock();
 
                        #ifndef NDEBUG
@@ -771,9 +771,12 @@ namespace sw
                {
                        DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
 
-                       if(draw->primitive >= draw->count)
+                       int primitive = draw->primitive;
+                       int count = draw->count;
+
+                       if(primitive >= count)
                        {
-                               currentDraw++;
+                               ++currentDraw; // Atomic
 
                                if(currentDraw == nextDraw)
                                {
@@ -785,8 +788,8 @@ namespace sw
 
                        if(!primitiveProgress[unit].references)   // Task not already being executed and not still in use by a pixel unit
                        {
-                               int primitive = draw->primitive;
-                               int count = draw->count;
+                               primitive = draw->primitive;
+                               count = draw->count;
                                int batch = draw->batchSize;
 
                                primitiveProgress[unit].drawCall = currentDraw;
@@ -812,7 +815,9 @@ namespace sw
        {
                schedulerMutex.lock();
 
-               if((int)qSize < threadCount - threadsAwake + 1)
+               int curThreadsAwake = threadsAwake;
+
+               if((int)qSize < threadCount - curThreadsAwake + 1)
                {
                        findAvailableTasks();
                }
@@ -822,9 +827,9 @@ namespace sw
                        task[threadIndex] = taskQueue[(qHead - qSize) % 32];
                        qSize--;
 
-                       if(threadsAwake != threadCount)
+                       if(curThreadsAwake != threadCount)
                        {
-                               int wakeup = qSize - threadsAwake + 1;
+                               int wakeup = qSize - curThreadsAwake + 1;
 
                                for(int i = 0; i < threadCount && wakeup > 0; i++)
                                {
@@ -834,7 +839,7 @@ namespace sw
                                                task[i].type = Task::RESUME;
                                                resume[i]->signal();
 
-                                               threadsAwake++;
+                                               ++threadsAwake; // Atomic
                                                wakeup--;
                                        }
                                }
@@ -844,7 +849,7 @@ namespace sw
                {
                        task[threadIndex].type = Task::SUSPEND;
 
-                       threadsAwake--;
+                       --threadsAwake; // Atomic
                }
 
                schedulerMutex.unlock();
@@ -943,15 +948,15 @@ namespace sw
 
                if(pixelProgress[cluster].processedPrimitives >= draw.count)
                {
-                       pixelProgress[cluster].drawCall++;
+                       ++pixelProgress[cluster].drawCall; // Atomic
                        pixelProgress[cluster].processedPrimitives = 0;
                }
 
-               int ref = atomicDecrement(&primitiveProgress[unit].references);
+               int ref = primitiveProgress[unit].references--; // Atomic
 
                if(ref == 0)
                {
-                       ref = atomicDecrement(&draw.references);
+                       ref = draw.references--; // Atomic
 
                        if(ref == 0)
                        {
@@ -976,17 +981,17 @@ namespace sw
                                                case Query::FRAGMENTS_PASSED:
                                                        for(int cluster = 0; cluster < clusterCount; cluster++)
                                                        {
-                                                               atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
+                                                               query->data += data.occlusion[cluster];
                                                        }
                                                        break;
                                                case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
-                                                       atomicAdd((volatile int*)&query->data, processedPrimitives);
+                                                       query->data += processedPrimitives;
                                                        break;
                                                default:
                                                        break;
                                                }
 
-                                               atomicDecrement(&query->reference);
+                                               --query->reference; // Atomic
                                        }
 
                                        delete draw.queries;
@@ -1069,17 +1074,18 @@ namespace sw
        void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
        {
                Triangle *triangle = triangleBatch[unit];
-               DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
+               int primitiveDrawCall = primitiveProgress[unit].drawCall;
+               DrawCall *draw = drawList[primitiveDrawCall % DRAW_COUNT];
                DrawData *data = draw->data;
                VertexTask *task = vertexTask[thread];
 
                const void *indices = data->indices;
                VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
 
-               if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
+               if(task->vertexCache.drawCall != primitiveDrawCall)
                {
                        task->vertexCache.clear();
-                       task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
+                       task->vertexCache.drawCall = primitiveDrawCall;
                }
 
                unsigned int batch[128][3];   // FIXME: Adjust to dynamic batch size
index e33f828..4576d23 100644 (file)
@@ -110,8 +110,8 @@ namespace sw
                }
 
                bool building;
-               volatile int reference;
-               volatile unsigned int data;
+               AtomicInt reference;
+               AtomicInt data;
 
                const Type type;
        };
@@ -213,8 +213,8 @@ namespace sw
 
                ~DrawCall();
 
-               DrawType drawType;
-               int batchSize;
+               AtomicInt drawType;
+               AtomicInt batchSize;
 
                Routine *vertexRoutine;
                Routine *setupRoutine;
@@ -247,11 +247,11 @@ namespace sw
 
                std::list<Query*> *queries;
 
-               int clipFlags;
+               AtomicInt clipFlags;
 
-               volatile int primitive;    // Current primitive to enter pipeline
-               volatile int count;        // Number of primitives to render
-               volatile int references;   // Remaining references to this draw call, 0 when done drawing, -1 when resources unlocked and slot is free
+               AtomicInt primitive;    // Current primitive to enter pipeline
+               AtomicInt count;        // Number of primitives to render
+               AtomicInt references;   // Remaining references to this draw call, 0 when done drawing, -1 when resources unlocked and slot is free
 
                DrawData *data;
        };
@@ -279,9 +279,9 @@ namespace sw
                                SUSPEND
                        };
 
-                       volatile Type type;
-                       volatile int primitiveUnit;
-                       volatile int pixelCluster;
+                       AtomicInt type;
+                       AtomicInt primitiveUnit;
+                       AtomicInt pixelCluster;
                };
 
                struct PrimitiveProgress
@@ -295,11 +295,11 @@ namespace sw
                                references = 0;
                        }
 
-                       volatile int drawCall;
-                       volatile int firstPrimitive;
-                       volatile int primitiveCount;
-                       volatile int visible;
-                       volatile int references;
+                       AtomicInt drawCall;
+                       AtomicInt firstPrimitive;
+                       AtomicInt primitiveCount;
+                       AtomicInt visible;
+                       AtomicInt references;
                };
 
                struct PixelProgress
@@ -311,9 +311,9 @@ namespace sw
                                executing = false;
                        }
 
-                       volatile int drawCall;
-                       volatile int processedPrimitives;
-                       volatile bool executing;
+                       AtomicInt drawCall;
+                       AtomicInt processedPrimitives;
+                       AtomicInt executing;
                };
 
        public:
@@ -449,8 +449,8 @@ namespace sw
                Plane clipPlane[MAX_CLIP_PLANES];   // Tranformed to clip space
                bool updateClipPlanes;
 
-               volatile bool exitThreads;
-               volatile int threadsAwake;
+               AtomicInt exitThreads;
+               AtomicInt threadsAwake;
                Thread *worker[16];
                Event *resume[16];         // Events for resuming threads
                Event *suspend[16];        // Events for suspending threads
@@ -464,8 +464,8 @@ namespace sw
                DrawCall *drawCall[DRAW_COUNT];
                DrawCall *drawList[DRAW_COUNT];
 
-               volatile int currentDraw;
-               volatile int nextDraw;
+               AtomicInt currentDraw;
+               AtomicInt nextDraw;
 
                Task taskQueue[32];
                unsigned int qHead;
index 6418c08..8b3b8c5 100644 (file)
@@ -245,7 +245,7 @@ namespace sw
                        int sliceB;
                        int sliceP;
                        Format format;
-                       Lock lock;
+                       AtomicInt lock;
 
                        bool dirty;
                };