#include <stdlib.h>
+#if defined(__clang__)
+#define USE_STD_ATOMIC __has_include(<atomic>) // clang has an explicit check for the availability of atomic
+// atomic is available in C++11 or newer, and in Visual Studio 2012 or newer
+#elif (defined(_MSC_VER) && (_MSC_VER >= 1700)) || (__cplusplus >= 201103L)
+#define USE_STD_ATOMIC 1
+#endif
+
+#if USE_STD_ATOMIC
+#include <atomic>
+#endif
+
namespace sw
{
class Event;
inline int atomicAdd(volatile int* target, int value)
{
- #if defined(_MSC_VER)
+ #if defined(_WIN32)
return InterlockedExchangeAdd((volatile long*)target, value) + value;
#else
return __sync_add_and_fetch(target, value);
__asm__ __volatile__ ("nop");
#endif
}
+
+ #if USE_STD_ATOMIC
+ class AtomicInt
+ {
+ public:
+ AtomicInt() : ai() {}
+ AtomicInt(int i) : ai(i) {}
+
+ inline operator int() const { return ai.load(std::memory_order_acquire); }
+ inline void operator=(const AtomicInt& i) { ai.store(i.ai.load(std::memory_order_acquire), std::memory_order_release); }
+ inline void operator=(int i) { ai.store(i, std::memory_order_release); }
+ inline void operator--() { ai.fetch_sub(1, std::memory_order_acq_rel); }
+ inline void operator++() { ai.fetch_add(1, std::memory_order_acq_rel); }
+ inline int operator--(int) { return ai.fetch_sub(1, std::memory_order_acq_rel) - 1; }
+ inline int operator++(int) { return ai.fetch_add(1, std::memory_order_acq_rel) + 1; }
+ inline void operator-=(int i) { ai.fetch_sub(i, std::memory_order_acq_rel); }
+ inline void operator+=(int i) { ai.fetch_add(i, std::memory_order_acq_rel); }
+ private:
+ std::atomic<int> ai;
+ };
+ #else
+ class AtomicInt
+ {
+ public:
+ AtomicInt() {}
+ AtomicInt(int i) : vi(i) {}
+
+ inline operator int() const { return vi; } // Note: this isn't a guaranteed atomic operation
+ inline void operator=(const AtomicInt& i) { sw::atomicExchange(&vi, i.vi); }
+ inline void operator=(int i) { sw::atomicExchange(&vi, i); }
+ inline void operator--() { sw::atomicDecrement(&vi); }
+ inline void operator++() { sw::atomicIncrement(&vi); }
+ inline int operator--(int) { return sw::atomicDecrement(&vi); }
+ inline int operator++(int) { return sw::atomicIncrement(&vi); }
+ inline void operator-=(int i) { sw::atomicAdd(&vi, -i); }
+ inline void operator+=(int i) { sw::atomicAdd(&vi, i); }
+ private:
+ volatile int vi;
+ };
+ #endif
}
#endif // sw_Thread_hpp
if(clipFlagsOr & CLIP_USER)
{
+ int clipFlags = draw.clipFlags;
DrawData &data = *draw.data;
if(polygon.n >= 3) {
- if(draw.clipFlags & CLIP_PLANE0) clipPlane(polygon, data.clipPlane[0]);
+ if(clipFlags & CLIP_PLANE0) clipPlane(polygon, data.clipPlane[0]);
if(polygon.n >= 3) {
- if(draw.clipFlags & CLIP_PLANE1) clipPlane(polygon, data.clipPlane[1]);
+ if(clipFlags & CLIP_PLANE1) clipPlane(polygon, data.clipPlane[1]);
if(polygon.n >= 3) {
- if(draw.clipFlags & CLIP_PLANE2) clipPlane(polygon, data.clipPlane[2]);
+ if(clipFlags & CLIP_PLANE2) clipPlane(polygon, data.clipPlane[2]);
if(polygon.n >= 3) {
- if(draw.clipFlags & CLIP_PLANE3) clipPlane(polygon, data.clipPlane[3]);
+ if(clipFlags & CLIP_PLANE3) clipPlane(polygon, data.clipPlane[3]);
if(polygon.n >= 3) {
- if(draw.clipFlags & CLIP_PLANE4) clipPlane(polygon, data.clipPlane[4]);
+ if(clipFlags & CLIP_PLANE4) clipPlane(polygon, data.clipPlane[4]);
if(polygon.n >= 3) {
- if(draw.clipFlags & CLIP_PLANE5) clipPlane(polygon, data.clipPlane[5]);
+ if(clipFlags & CLIP_PLANE5) clipPlane(polygon, data.clipPlane[5]);
}}}}}}
}
Query* q = *query;
if(includePrimitivesWrittenQueries || (q->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN))
{
- atomicIncrement(&(q->reference));
+ ++q->reference; // Atomic
draw->queries->push_back(q);
}
}
draw->references = (count + batch - 1) / batch;
schedulerMutex.lock();
- nextDraw++;
+ ++nextDraw; // Atomic
schedulerMutex.unlock();
#ifndef NDEBUG
{
DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
- if(draw->primitive >= draw->count)
+ int primitive = draw->primitive;
+ int count = draw->count;
+
+ if(primitive >= count)
{
- currentDraw++;
+ ++currentDraw; // Atomic
if(currentDraw == nextDraw)
{
if(!primitiveProgress[unit].references) // Task not already being executed and not still in use by a pixel unit
{
- int primitive = draw->primitive;
- int count = draw->count;
+ primitive = draw->primitive;
+ count = draw->count;
int batch = draw->batchSize;
primitiveProgress[unit].drawCall = currentDraw;
{
schedulerMutex.lock();
- if((int)qSize < threadCount - threadsAwake + 1)
+ int curThreadsAwake = threadsAwake;
+
+ if((int)qSize < threadCount - curThreadsAwake + 1)
{
findAvailableTasks();
}
task[threadIndex] = taskQueue[(qHead - qSize) % 32];
qSize--;
- if(threadsAwake != threadCount)
+ if(curThreadsAwake != threadCount)
{
- int wakeup = qSize - threadsAwake + 1;
+ int wakeup = qSize - curThreadsAwake + 1;
for(int i = 0; i < threadCount && wakeup > 0; i++)
{
task[i].type = Task::RESUME;
resume[i]->signal();
- threadsAwake++;
+ ++threadsAwake; // Atomic
wakeup--;
}
}
{
task[threadIndex].type = Task::SUSPEND;
- threadsAwake--;
+ --threadsAwake; // Atomic
}
schedulerMutex.unlock();
if(pixelProgress[cluster].processedPrimitives >= draw.count)
{
- pixelProgress[cluster].drawCall++;
+ ++pixelProgress[cluster].drawCall; // Atomic
pixelProgress[cluster].processedPrimitives = 0;
}
- int ref = atomicDecrement(&primitiveProgress[unit].references);
+ int ref = primitiveProgress[unit].references--; // Atomic
if(ref == 0)
{
- ref = atomicDecrement(&draw.references);
+ ref = draw.references--; // Atomic
if(ref == 0)
{
case Query::FRAGMENTS_PASSED:
for(int cluster = 0; cluster < clusterCount; cluster++)
{
- atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
+ query->data += data.occlusion[cluster];
}
break;
case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
- atomicAdd((volatile int*)&query->data, processedPrimitives);
+ query->data += processedPrimitives;
break;
default:
break;
}
- atomicDecrement(&query->reference);
+ --query->reference; // Atomic
}
delete draw.queries;
void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
{
Triangle *triangle = triangleBatch[unit];
- DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
+ int primitiveDrawCall = primitiveProgress[unit].drawCall;
+ DrawCall *draw = drawList[primitiveDrawCall % DRAW_COUNT];
DrawData *data = draw->data;
VertexTask *task = vertexTask[thread];
const void *indices = data->indices;
VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
- if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
+ if(task->vertexCache.drawCall != primitiveDrawCall)
{
task->vertexCache.clear();
- task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
+ task->vertexCache.drawCall = primitiveDrawCall;
}
unsigned int batch[128][3]; // FIXME: Adjust to dynamic batch size
}
bool building;
- volatile int reference;
- volatile unsigned int data;
+ AtomicInt reference;
+ AtomicInt data;
const Type type;
};
~DrawCall();
- DrawType drawType;
- int batchSize;
+ AtomicInt drawType;
+ AtomicInt batchSize;
Routine *vertexRoutine;
Routine *setupRoutine;
std::list<Query*> *queries;
- int clipFlags;
+ AtomicInt clipFlags;
- volatile int primitive; // Current primitive to enter pipeline
- volatile int count; // Number of primitives to render
- volatile int references; // Remaining references to this draw call, 0 when done drawing, -1 when resources unlocked and slot is free
+ AtomicInt primitive; // Current primitive to enter pipeline
+ AtomicInt count; // Number of primitives to render
+ AtomicInt references; // Remaining references to this draw call, 0 when done drawing, -1 when resources unlocked and slot is free
DrawData *data;
};
SUSPEND
};
- volatile Type type;
- volatile int primitiveUnit;
- volatile int pixelCluster;
+ AtomicInt type;
+ AtomicInt primitiveUnit;
+ AtomicInt pixelCluster;
};
struct PrimitiveProgress
references = 0;
}
- volatile int drawCall;
- volatile int firstPrimitive;
- volatile int primitiveCount;
- volatile int visible;
- volatile int references;
+ AtomicInt drawCall;
+ AtomicInt firstPrimitive;
+ AtomicInt primitiveCount;
+ AtomicInt visible;
+ AtomicInt references;
};
struct PixelProgress
executing = false;
}
- volatile int drawCall;
- volatile int processedPrimitives;
- volatile bool executing;
+ AtomicInt drawCall;
+ AtomicInt processedPrimitives;
+ AtomicInt executing;
};
public:
Plane clipPlane[MAX_CLIP_PLANES]; // Tranformed to clip space
bool updateClipPlanes;
- volatile bool exitThreads;
- volatile int threadsAwake;
+ AtomicInt exitThreads;
+ AtomicInt threadsAwake;
Thread *worker[16];
Event *resume[16]; // Events for resuming threads
Event *suspend[16]; // Events for suspending threads
DrawCall *drawCall[DRAW_COUNT];
DrawCall *drawList[DRAW_COUNT];
- volatile int currentDraw;
- volatile int nextDraw;
+ AtomicInt currentDraw;
+ AtomicInt nextDraw;
Task taskQueue[32];
unsigned int qHead;
int sliceB;
int sliceP;
Format format;
- Lock lock;
+ AtomicInt lock;
bool dirty;
};