From e374d2d24b0d755c9380da0eb33e4151b1ad145f Mon Sep 17 00:00:00 2001 From: Tim Rowley Date: Thu, 10 Mar 2016 18:30:40 -0600 Subject: [PATCH] swr: [rasterizer] Discard work + misc fixes --- src/gallium/drivers/swr/rasterizer/core/api.cpp | 40 +++++++++++++++-- src/gallium/drivers/swr/rasterizer/core/api.h | 10 +++++ .../drivers/swr/rasterizer/core/backend.cpp | 12 +++--- src/gallium/drivers/swr/rasterizer/core/backend.h | 2 +- src/gallium/drivers/swr/rasterizer/core/context.h | 12 ++++-- .../drivers/swr/rasterizer/core/frontend.cpp | 50 +++++++++++++++++----- src/gallium/drivers/swr/rasterizer/core/frontend.h | 2 +- .../drivers/swr/rasterizer/core/tilemgr.cpp | 17 +++++++- src/gallium/drivers/swr/rasterizer/core/tilemgr.h | 2 +- 9 files changed, 119 insertions(+), 28 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index e2ea5d934d2..c3c603d294c 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -1265,7 +1265,10 @@ void SwrDrawIndexedInstanced( DrawIndexedInstance(hContext, topology, numIndices, indexOffset, baseVertex, numInstances, startInstance); } -// Attach surfaces to pipeline +////////////////////////////////////////////////////////////////////////// +/// @brief SwrInvalidateTiles +/// @param hContext - Handle passed back from SwrCreateContext +/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to invalidate. void SwrInvalidateTiles( HANDLE hContext, uint32_t attachmentMask) @@ -1273,10 +1276,39 @@ void SwrInvalidateTiles( SWR_CONTEXT *pContext = (SWR_CONTEXT*)hContext; DRAW_CONTEXT* pDC = GetDrawContext(pContext); + pDC->FeWork.type = DISCARDINVALIDATETILES; + pDC->FeWork.pfnWork = ProcessDiscardInvalidateTiles; + pDC->FeWork.desc.discardInvalidateTiles.attachmentMask = attachmentMask; + memset(&pDC->FeWork.desc.discardInvalidateTiles.rect, 0, sizeof(SWR_RECT)); + pDC->FeWork.desc.discardInvalidateTiles.newTileState = SWR_TILE_INVALID; + pDC->FeWork.desc.discardInvalidateTiles.createNewTiles = false; + pDC->FeWork.desc.discardInvalidateTiles.fullTilesOnly = false; + + //enqueue + QueueDraw(pContext); +} + +////////////////////////////////////////////////////////////////////////// +/// @brief SwrDiscardRect +/// @param hContext - Handle passed back from SwrCreateContext +/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to discard. +/// @param rect - if rect is all zeros, the entire attachment surface will be discarded +void SwrDiscardRect( + HANDLE hContext, + uint32_t attachmentMask, + SWR_RECT rect) +{ + SWR_CONTEXT *pContext = (SWR_CONTEXT*)hContext; + DRAW_CONTEXT* pDC = GetDrawContext(pContext); + // Queue a load to the hottile - pDC->FeWork.type = INVALIDATETILES; - pDC->FeWork.pfnWork = ProcessInvalidateTiles; - pDC->FeWork.desc.invalidateTiles.attachmentMask = attachmentMask; + pDC->FeWork.type = DISCARDINVALIDATETILES; + pDC->FeWork.pfnWork = ProcessDiscardInvalidateTiles; + pDC->FeWork.desc.discardInvalidateTiles.attachmentMask = attachmentMask; + pDC->FeWork.desc.discardInvalidateTiles.rect = rect; + pDC->FeWork.desc.discardInvalidateTiles.newTileState = SWR_TILE_RESOLVED; + pDC->FeWork.desc.discardInvalidateTiles.createNewTiles = true; + pDC->FeWork.desc.discardInvalidateTiles.fullTilesOnly = true; //enqueue QueueDraw(pContext); diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h b/src/gallium/drivers/swr/rasterizer/core/api.h index 30bafd70c2f..90c2f038c46 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.h +++ b/src/gallium/drivers/swr/rasterizer/core/api.h @@ -409,6 +409,16 @@ void SWR_API SwrInvalidateTiles( uint32_t attachmentMask); ////////////////////////////////////////////////////////////////////////// +/// @brief SwrDiscardRect +/// @param hContext - Handle passed back from SwrCreateContext +/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to discard. +/// @param rect - if rect is all zeros, the entire attachment surface will be discarded +void SWR_API SwrDiscardRect( + HANDLE hContext, + uint32_t attachmentMask, + SWR_RECT rect); + +////////////////////////////////////////////////////////////////////////// /// @brief SwrDispatch /// @param hContext - Handle passed back from SwrCreateContext /// @param threadGroupCountX - Number of thread groups dispatched in X direction diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp index 2ca549a2a81..7afbb70a383 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp @@ -399,20 +399,22 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile } -void ProcessInvalidateTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData) +void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData) { - INVALIDATE_TILES_DESC *pDesc = (INVALIDATE_TILES_DESC*)pData; + DISCARD_INVALIDATE_TILES_DESC *pDesc = (DISCARD_INVALIDATE_TILES_DESC *)pData; SWR_CONTEXT *pContext = pDC->pContext; + const int numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount); + for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; ++i) { if (pDesc->attachmentMask & (1 << i)) { - HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTileNoLoad(pContext, pDC, macroTile, (SWR_RENDERTARGET_ATTACHMENT)i); + HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTileNoLoad( + pContext, pDC, macroTile, (SWR_RENDERTARGET_ATTACHMENT)i, pDesc->createNewTiles, numSamples); if (pHotTile) { - SWR_ASSERT(pHotTile->state == HOTTILE_INVALID || pHotTile->state == HOTTILE_RESOLVED); - pHotTile->state = HOTTILE_INVALID; + pHotTile->state = (HOTTILE_STATE)pDesc->newTileState; } } } diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.h b/src/gallium/drivers/swr/rasterizer/core/backend.h index 91b8cccf3ac..2fa18953cad 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend.h +++ b/src/gallium/drivers/swr/rasterizer/core/backend.h @@ -37,7 +37,7 @@ void ProcessSyncBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi void ProcessQueryStatsBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData); void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData); void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData); -void ProcessInvalidateTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData); +void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData); void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers); void InitClearTilesTable(); simdmask ComputeUserClipMask(uint8_t clipMask, float* pUserClipBuffer, simdscalar vI, simdscalar vJ); diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h index 18c869f176b..ed972fa5478 100644 --- a/src/gallium/drivers/swr/rasterizer/core/context.h +++ b/src/gallium/drivers/swr/rasterizer/core/context.h @@ -114,9 +114,13 @@ struct CLEAR_DESC uint8_t clearStencil; }; -struct INVALIDATE_TILES_DESC +struct DISCARD_INVALIDATE_TILES_DESC { uint32_t attachmentMask; + SWR_RECT rect; + SWR_TILE_STATE newTileState; + bool createNewTiles; + bool fullTilesOnly; }; struct SYNC_DESC @@ -152,7 +156,7 @@ enum WORK_TYPE SYNC, DRAW, CLEAR, - INVALIDATETILES, + DISCARDINVALIDATETILES, STORETILES, QUERYSTATS, }; @@ -166,7 +170,7 @@ struct BE_WORK SYNC_DESC sync; TRIANGLE_WORK_DESC tri; CLEAR_DESC clear; - INVALIDATE_TILES_DESC invalidateTiles; + DISCARD_INVALIDATE_TILES_DESC discardInvalidateTiles; STORE_TILES_DESC storeTiles; QUERY_DESC queryStats; } desc; @@ -203,7 +207,7 @@ struct FE_WORK SYNC_DESC sync; DRAW_WORK draw; CLEAR_DESC clear; - INVALIDATE_TILES_DESC invalidateTiles; + DISCARD_INVALIDATE_TILES_DESC discardInvalidateTiles; STORE_TILES_DESC storeTiles; QUERY_DESC queryStats; } desc; diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index 44966a9e9a0..6db36395c86 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -193,35 +193,65 @@ void ProcessStoreTiles( /// @param workerId - thread's worker id. Even thread has a unique id. /// @param pUserData - Pointer to user data passed back to callback. /// @todo This should go away when we switch this to use compute threading. -void ProcessInvalidateTiles( +void ProcessDiscardInvalidateTiles( SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData) { RDTSC_START(FEProcessInvalidateTiles); - INVALIDATE_TILES_DESC *pInv = (INVALIDATE_TILES_DESC*)pUserData; + DISCARD_INVALIDATE_TILES_DESC *pInv = (DISCARD_INVALIDATE_TILES_DESC*)pUserData; MacroTileMgr *pTileMgr = pDC->pTileMgr; - const API_STATE& state = GetApiState(pDC); + SWR_RECT rect; + + if (pInv->rect.top | pInv->rect.bottom | pInv->rect.right | pInv->rect.left) + { + // Valid rect + rect = pInv->rect; + } + else + { + // Use viewport dimensions + const API_STATE& state = GetApiState(pDC); + + rect.left = (uint32_t)state.vp[0].x; + rect.right = (uint32_t)(state.vp[0].x + state.vp[0].width); + rect.top = (uint32_t)state.vp[0].y; + rect.bottom = (uint32_t)(state.vp[0].y + state.vp[0].height); + } // queue a store to each macro tile // compute macro tile bounds for the current render target uint32_t macroWidth = KNOB_MACROTILE_X_DIM; uint32_t macroHeight = KNOB_MACROTILE_Y_DIM; - uint32_t numMacroTilesX = ((uint32_t)state.vp[0].width + (uint32_t)state.vp[0].x + (macroWidth - 1)) / macroWidth; - uint32_t numMacroTilesY = ((uint32_t)state.vp[0].height + (uint32_t)state.vp[0].y + (macroHeight - 1)) / macroHeight; + // Setup region assuming full tiles + uint32_t macroTileStartX = (rect.left + (macroWidth - 1)) / macroWidth; + uint32_t macroTileStartY = (rect.top + (macroHeight - 1)) / macroHeight; + + uint32_t macroTileEndX = rect.right / macroWidth; + uint32_t macroTileEndY = rect.bottom / macroHeight; + + if (pInv->fullTilesOnly == false) + { + // include partial tiles + macroTileStartX = rect.left / macroWidth; + macroTileStartY = rect.top / macroHeight; + + macroTileEndX = (rect.right + macroWidth - 1) / macroWidth; + macroTileEndY = (rect.bottom + macroHeight - 1) / macroHeight; + } // load tiles BE_WORK work; - work.type = INVALIDATETILES; - work.pfnWork = ProcessInvalidateTilesBE; - work.desc.invalidateTiles = *pInv; + work.type = DISCARDINVALIDATETILES; + work.pfnWork = ProcessDiscardInvalidateTilesBE; + work.desc.discardInvalidateTiles = *pInv; - for (uint32_t x = 0; x < numMacroTilesX; ++x) + for (uint32_t x = macroTileStartX; x < macroTileEndX; ++x) { - for (uint32_t y = 0; y < numMacroTilesY; ++y) + for (uint32_t y = macroTileStartY; y < macroTileEndY; ++y) { pTileMgr->enqueue(x, y, &work); } diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.h b/src/gallium/drivers/swr/rasterizer/core/frontend.h index acb935fc251..9a2f0434db5 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.h +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.h @@ -316,7 +316,7 @@ void ProcessDraw(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, vo void ProcessClear(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); void ProcessStoreTiles(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); -void ProcessInvalidateTiles(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); +void ProcessDiscardInvalidateTiles(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); void ProcessSync(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); void ProcessQueryStats(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp index f26903e2608..09cc23e5db7 100644 --- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp @@ -186,7 +186,9 @@ HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32 return &tile.Attachment[attachment]; } -HOTTILE* HotTileMgr::GetHotTileNoLoad(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment) +HOTTILE* HotTileMgr::GetHotTileNoLoad( + SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, + SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples) { uint32_t x, y; MacroTileMgr::getTileIndices(macroID, x, y); @@ -198,7 +200,18 @@ HOTTILE* HotTileMgr::GetHotTileNoLoad(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, HOTTILE& hotTile = tile.Attachment[attachment]; if (hotTile.pBuffer == NULL) { - return NULL; + if (create) + { + uint32_t size = numSamples * mHotTileSize[attachment]; + hotTile.pBuffer = (uint8_t*)_aligned_malloc(size, KNOB_SIMD_WIDTH * 4); + hotTile.state = HOTTILE_INVALID; + hotTile.numSamples = numSamples; + hotTile.renderTargetArrayIndex = 0; + } + else + { + return NULL; + } } return &hotTile; diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h index 22cce0381bc..30f80ce4247 100644 --- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h +++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h @@ -297,7 +297,7 @@ public: HOTTILE *GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1, uint32_t renderTargetArrayIndex = 0); - HOTTILE *GetHotTileNoLoad(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment); + HOTTILE *GetHotTileNoLoad(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1); static void ClearColorHotTile(const HOTTILE* pHotTile); static void ClearDepthHotTile(const HOTTILE* pHotTile); -- 2.11.0