Use atomic operations to specify shared memory access order

author Alexis Hetu <sugoi@google.com>

Wed, 20 Sep 2017 15:24:52 +0000 (11:24 -0400)

committer Alexis Hétu <sugoi@google.com>

Thu, 21 Sep 2017 18:02:59 +0000 (18:02 +0000)
author Alexis Hetu <sugoi@google.com>
Wed, 20 Sep 2017 15:24:52 +0000 (11:24 -0400)
committer Alexis Hétu <sugoi@google.com>
Thu, 21 Sep 2017 18:02:59 +0000 (18:02 +0000)
diff --git a/src/Common/Thread.hpp b/src/Common/Thread.hpp

index 87b90d1..7b4e0ca 100644 (file)
--- a/src/Common/Thread.hpp
+++ b/src/Common/Thread.hpp
@@ -30,6 +30,17 @@
  
  #include <stdlib.h>
  
+#if defined(__clang__)
+#define USE_STD_ATOMIC __has_include(<atomic>) // clang has an explicit check for the availability of atomic
+// atomic is available in C++11 or newer, and in Visual Studio 2012 or newer
+#elif (defined(_MSC_VER) && (_MSC_VER >= 1700)) || (__cplusplus >= 201103L)
+#define USE_STD_ATOMIC 1
+#endif
+
+#if USE_STD_ATOMIC
+#include <atomic>
+#endif
+
  namespace sw
  {
         class Event;
@@ -265,7 +276,7 @@ namespace sw
  
         inline int atomicAdd(volatile int* target, int value)
         {
-               #if defined(_MSC_VER)
+               #if defined(_WIN32)
                         return InterlockedExchangeAdd((volatile long*)target, value) + value;
                 #else
                         return __sync_add_and_fetch(target, value);
@@ -280,6 +291,46 @@ namespace sw
                         __asm__ __volatile__ ("nop");
                 #endif
         }
+
+       #if USE_STD_ATOMIC
+               class AtomicInt
+               {
+               public:
+                       AtomicInt() : ai() {}
+                       AtomicInt(int i) : ai(i) {}
+
+                       inline operator int() const { return ai.load(std::memory_order_acquire); }
+                       inline void operator=(const AtomicInt& i) { ai.store(i.ai.load(std::memory_order_acquire), std::memory_order_release); }
+                       inline void operator=(int i) { ai.store(i, std::memory_order_release); }
+                       inline void operator--() { ai.fetch_sub(1, std::memory_order_acq_rel); }
+                       inline void operator++() { ai.fetch_add(1, std::memory_order_acq_rel); }
+                       inline int operator--(int) { return ai.fetch_sub(1, std::memory_order_acq_rel) - 1; }
+                       inline int operator++(int) { return ai.fetch_add(1, std::memory_order_acq_rel) + 1; }
+                       inline void operator-=(int i) { ai.fetch_sub(i, std::memory_order_acq_rel); }
+                       inline void operator+=(int i) { ai.fetch_add(i, std::memory_order_acq_rel); }
+               private:
+                       std::atomic<int> ai;
+               };
+       #else
+               class AtomicInt
+               {
+               public:
+                       AtomicInt() {}
+                       AtomicInt(int i) : vi(i) {}
+
+                       inline operator int() const { return vi; } // Note: this isn't a guaranteed atomic operation
+                       inline void operator=(const AtomicInt& i) { sw::atomicExchange(&vi, i.vi); }
+                       inline void operator=(int i) { sw::atomicExchange(&vi, i); }
+                       inline void operator--() { sw::atomicDecrement(&vi); }
+                       inline void operator++() { sw::atomicIncrement(&vi); }
+                       inline int operator--(int) { return sw::atomicDecrement(&vi); }
+                       inline int operator++(int) { return sw::atomicIncrement(&vi); }
+                       inline void operator-=(int i) { sw::atomicAdd(&vi, -i); }
+                       inline void operator+=(int i) { sw::atomicAdd(&vi, i); }
+               private:
+                       volatile int vi;
+               };
+       #endif
  }
  
  #endif   // sw_Thread_hpp
diff --git a/src/Renderer/Clipper.cpp b/src/Renderer/Clipper.cpp

index cfd859f..cf3fe3c 100644 (file)
--- a/src/Renderer/Clipper.cpp
+++ b/src/Renderer/Clipper.cpp
@@ -60,20 +60,21 @@ namespace sw
  
                 if(clipFlagsOr & CLIP_USER)
                 {
+                       int clipFlags = draw.clipFlags;
                         DrawData &data = *draw.data;
  
                         if(polygon.n >= 3) {
-                       if(draw.clipFlags & CLIP_PLANE0) clipPlane(polygon, data.clipPlane[0]);
+                       if(clipFlags & CLIP_PLANE0) clipPlane(polygon, data.clipPlane[0]);
                         if(polygon.n >= 3) {
-                       if(draw.clipFlags & CLIP_PLANE1) clipPlane(polygon, data.clipPlane[1]);
+                       if(clipFlags & CLIP_PLANE1) clipPlane(polygon, data.clipPlane[1]);
                         if(polygon.n >= 3) {
-                       if(draw.clipFlags & CLIP_PLANE2) clipPlane(polygon, data.clipPlane[2]);
+                       if(clipFlags & CLIP_PLANE2) clipPlane(polygon, data.clipPlane[2]);
                         if(polygon.n >= 3) {
-                       if(draw.clipFlags & CLIP_PLANE3) clipPlane(polygon, data.clipPlane[3]);
+                       if(clipFlags & CLIP_PLANE3) clipPlane(polygon, data.clipPlane[3]);
                         if(polygon.n >= 3) {
-                       if(draw.clipFlags & CLIP_PLANE4) clipPlane(polygon, data.clipPlane[4]);
+                       if(clipFlags & CLIP_PLANE4) clipPlane(polygon, data.clipPlane[4]);
                         if(polygon.n >= 3) {
-                       if(draw.clipFlags & CLIP_PLANE5) clipPlane(polygon, data.clipPlane[5]);
+                       if(clipFlags & CLIP_PLANE5) clipPlane(polygon, data.clipPlane[5]);
                         }}}}}}
                 }
  
diff --git a/src/Renderer/Renderer.cpp b/src/Renderer/Renderer.cpp

index 0869697..e918c43 100644 (file)
--- a/src/Renderer/Renderer.cpp
+++ b/src/Renderer/Renderer.cpp
@@ -314,7 +314,7 @@ namespace sw
                                         Query* q = *query;
                                         if(includePrimitivesWrittenQueries || (q->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN))
                                         {
-                                               atomicIncrement(&(q->reference));
+                                               ++q->reference; // Atomic
                                                 draw->queries->push_back(q);
                                         }
                                 }
@@ -645,7 +645,7 @@ namespace sw
                         draw->references = (count + batch - 1) / batch;
  
                         schedulerMutex.lock();
-                       nextDraw++;
+                       ++nextDraw; // Atomic
                         schedulerMutex.unlock();
  
                         #ifndef NDEBUG
@@ -771,9 +771,12 @@ namespace sw
                 {
                         DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
  
-                       if(draw->primitive >= draw->count)
+                       int primitive = draw->primitive;
+                       int count = draw->count;
+
+                       if(primitive >= count)
                         {
-                               currentDraw++;
+                               ++currentDraw; // Atomic
  
                                 if(currentDraw == nextDraw)
                                 {
@@ -785,8 +788,8 @@ namespace sw
  
                         if(!primitiveProgress[unit].references)   // Task not already being executed and not still in use by a pixel unit
                         {
-                               int primitive = draw->primitive;
-                               int count = draw->count;
+                               primitive = draw->primitive;
+                               count = draw->count;
                                 int batch = draw->batchSize;
  
                                 primitiveProgress[unit].drawCall = currentDraw;
@@ -812,7 +815,9 @@ namespace sw
         {
                 schedulerMutex.lock();
  
-               if((int)qSize < threadCount - threadsAwake + 1)
+               int curThreadsAwake = threadsAwake;
+
+               if((int)qSize < threadCount - curThreadsAwake + 1)
                 {
                         findAvailableTasks();
                 }
@@ -822,9 +827,9 @@ namespace sw
                         task[threadIndex] = taskQueue[(qHead - qSize) % 32];
                         qSize--;
  
-                       if(threadsAwake != threadCount)
+                       if(curThreadsAwake != threadCount)
                         {
-                               int wakeup = qSize - threadsAwake + 1;
+                               int wakeup = qSize - curThreadsAwake + 1;
  
                                 for(int i = 0; i < threadCount && wakeup > 0; i++)
                                 {
@@ -834,7 +839,7 @@ namespace sw
                                                 task[i].type = Task::RESUME;
                                                 resume[i]->signal();
  
-                                               threadsAwake++;
+                                               ++threadsAwake; // Atomic
                                                 wakeup--;
                                         }
                                 }
@@ -844,7 +849,7 @@ namespace sw
                 {
                         task[threadIndex].type = Task::SUSPEND;
  
-                       threadsAwake--;
+                       --threadsAwake; // Atomic
                 }
  
                 schedulerMutex.unlock();
@@ -943,15 +948,15 @@ namespace sw
  
                 if(pixelProgress[cluster].processedPrimitives >= draw.count)
                 {
-                       pixelProgress[cluster].drawCall++;
+                       ++pixelProgress[cluster].drawCall; // Atomic
                         pixelProgress[cluster].processedPrimitives = 0;
                 }
  
-               int ref = atomicDecrement(&primitiveProgress[unit].references);
+               int ref = primitiveProgress[unit].references--; // Atomic
  
                 if(ref == 0)
                 {
-                       ref = atomicDecrement(&draw.references);
+                       ref = draw.references--; // Atomic
  
                         if(ref == 0)
                         {
@@ -976,17 +981,17 @@ namespace sw
                                                 case Query::FRAGMENTS_PASSED:
                                                         for(int cluster = 0; cluster < clusterCount; cluster++)
                                                         {
-                                                               atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
+                                                               query->data += data.occlusion[cluster];
                                                         }
                                                         break;
                                                 case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
-                                                       atomicAdd((volatile int*)&query->data, processedPrimitives);
+                                                       query->data += processedPrimitives;
                                                         break;
                                                 default:
                                                         break;
                                                 }
  
-                                               atomicDecrement(&query->reference);
+                                               --query->reference; // Atomic
                                         }
  
                                         delete draw.queries;
@@ -1069,17 +1074,18 @@ namespace sw
         void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
         {
                 Triangle *triangle = triangleBatch[unit];
-               DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
+               int primitiveDrawCall = primitiveProgress[unit].drawCall;
+               DrawCall *draw = drawList[primitiveDrawCall % DRAW_COUNT];
                 DrawData *data = draw->data;
                 VertexTask *task = vertexTask[thread];
  
                 const void *indices = data->indices;
                 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
  
-               if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
+               if(task->vertexCache.drawCall != primitiveDrawCall)
                 {
                         task->vertexCache.clear();
-                       task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
+                       task->vertexCache.drawCall = primitiveDrawCall;
                 }
  
                 unsigned int batch[128][3];   // FIXME: Adjust to dynamic batch size
diff --git a/src/Renderer/Renderer.hpp b/src/Renderer/Renderer.hpp

index e33f828..4576d23 100644 (file)
--- a/src/Renderer/Renderer.hpp
+++ b/src/Renderer/Renderer.hpp
@@ -110,8 +110,8 @@ namespace sw
                 }
  
                 bool building;
-               volatile int reference;
-               volatile unsigned int data;
+               AtomicInt reference;
+               AtomicInt data;
  
                 const Type type;
         };
@@ -213,8 +213,8 @@ namespace sw
  
                 ~DrawCall();
  
-               DrawType drawType;
-               int batchSize;
+               AtomicInt drawType;
+               AtomicInt batchSize;
  
                 Routine *vertexRoutine;
                 Routine *setupRoutine;
@@ -247,11 +247,11 @@ namespace sw
  
                 std::list<Query*> *queries;
  
-               int clipFlags;
+               AtomicInt clipFlags;
  
-               volatile int primitive;    // Current primitive to enter pipeline
-               volatile int count;        // Number of primitives to render
-               volatile int references;   // Remaining references to this draw call, 0 when done drawing, -1 when resources unlocked and slot is free
+               AtomicInt primitive;    // Current primitive to enter pipeline
+               AtomicInt count;        // Number of primitives to render
+               AtomicInt references;   // Remaining references to this draw call, 0 when done drawing, -1 when resources unlocked and slot is free
  
                 DrawData *data;
         };
@@ -279,9 +279,9 @@ namespace sw
                                 SUSPEND
                         };
  
-                       volatile Type type;
-                       volatile int primitiveUnit;
-                       volatile int pixelCluster;
+                       AtomicInt type;
+                       AtomicInt primitiveUnit;
+                       AtomicInt pixelCluster;
                 };
  
                 struct PrimitiveProgress
@@ -295,11 +295,11 @@ namespace sw
                                 references = 0;
                         }
  
-                       volatile int drawCall;
-                       volatile int firstPrimitive;
-                       volatile int primitiveCount;
-                       volatile int visible;
-                       volatile int references;
+                       AtomicInt drawCall;
+                       AtomicInt firstPrimitive;
+                       AtomicInt primitiveCount;
+                       AtomicInt visible;
+                       AtomicInt references;
                 };
  
                 struct PixelProgress
@@ -311,9 +311,9 @@ namespace sw
                                 executing = false;
                         }
  
-                       volatile int drawCall;
-                       volatile int processedPrimitives;
-                       volatile bool executing;
+                       AtomicInt drawCall;
+                       AtomicInt processedPrimitives;
+                       AtomicInt executing;
                 };
  
         public:
@@ -449,8 +449,8 @@ namespace sw
                 Plane clipPlane[MAX_CLIP_PLANES];   // Tranformed to clip space
                 bool updateClipPlanes;
  
-               volatile bool exitThreads;
-               volatile int threadsAwake;
+               AtomicInt exitThreads;
+               AtomicInt threadsAwake;
                 Thread *worker[16];
                 Event *resume[16];         // Events for resuming threads
                 Event *suspend[16];        // Events for suspending threads
@@ -464,8 +464,8 @@ namespace sw
                 DrawCall *drawCall[DRAW_COUNT];
                 DrawCall *drawList[DRAW_COUNT];
  
-               volatile int currentDraw;
-               volatile int nextDraw;
+               AtomicInt currentDraw;
+               AtomicInt nextDraw;
  
                 Task taskQueue[32];
                 unsigned int qHead;
diff --git a/src/Renderer/Surface.hpp b/src/Renderer/Surface.hpp

index 6418c08..8b3b8c5 100644 (file)
--- a/src/Renderer/Surface.hpp
+++ b/src/Renderer/Surface.hpp
@@ -245,7 +245,7 @@ namespace sw
                         int sliceB;
                         int sliceP;
                         Format format;
-                       Lock lock;
+                       AtomicInt lock;
  
                         bool dirty;
                 };
author	Alexis Hetu <sugoi@google.com>
	Wed, 20 Sep 2017 15:24:52 +0000 (11:24 -0400)
committer	Alexis Hétu <sugoi@google.com>
	Thu, 21 Sep 2017 18:02:59 +0000 (18:02 +0000)
src/Common/Thread.hpp		patch \| blob \| history
src/Renderer/Clipper.cpp		patch \| blob \| history
src/Renderer/Renderer.cpp		patch \| blob \| history
src/Renderer/Renderer.hpp		patch \| blob \| history
src/Renderer/Surface.hpp		patch \| blob \| history