feature: bindless addressing - flush state cache after reusing SS slot
- when Surface State is reused for new resource, State Cache needs to be invalidated Related-To: NEO-7063 Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
parent
09ab0ca1d2
commit
69f5ca6345
|
@ -125,6 +125,12 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
|
|||
|
||||
auto neoDevice = this->device->getNEODevice();
|
||||
|
||||
bool stateCacheFlushRequired = neoDevice->getBindlessHeapsHelper() ? neoDevice->getBindlessHeapsHelper()->getStateDirtyForContext(this->csr->getOsContext().getContextId()) : false;
|
||||
if (stateCacheFlushRequired) {
|
||||
linearStreamSizeEstimate += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForFullCacheFlush();
|
||||
neoDevice->getBindlessHeapsHelper()->clearStateDirtyForContext(this->csr->getOsContext().getContextId());
|
||||
}
|
||||
|
||||
if (ctx.isDispatchTaskCountPostSyncRequired) {
|
||||
linearStreamSizeEstimate += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(neoDevice->getRootDeviceEnvironment(), false);
|
||||
}
|
||||
|
@ -146,6 +152,10 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
|
|||
this->makeSbaTrackingBufferResidentIfL0DebuggerEnabled(ctx.isDebugEnabled);
|
||||
this->makeCsrTagAllocationResident();
|
||||
|
||||
if (stateCacheFlushRequired) {
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addStateCacheFlush(child, neoDevice->getRootDeviceEnvironment());
|
||||
}
|
||||
|
||||
if (ctx.globalInit) {
|
||||
this->getTagsManagerHeapsAndMakeThemResidentIfSWTagsEnabled(child);
|
||||
this->csr->programHardwareContext(child);
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include "shared/source/helpers/pause_on_gpu_properties.h"
|
||||
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/mocks/mock_bindless_heaps_helper.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
|
||||
#include "level_zero/core/source/cmdlist/cmdlist.h"
|
||||
|
@ -780,5 +781,59 @@ HWTEST2_F(CmdListLargeGrfTest,
|
|||
testBody<FamilyType>();
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueExecuteCommandListsSimpleTest, GivenDirtyFlagForContextInBindlessHelperWhenExecutingCmdListsThenStateCacheInvalidateIsSent) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
ze_command_queue_desc_t queueDesc = {};
|
||||
ze_result_t returnValue;
|
||||
|
||||
auto bindlessHeapsHelper = std::make_unique<MockBindlesHeapsHelper>(neoDevice->getMemoryManager(), neoDevice->getNumGenericSubDevices() > 1, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield());
|
||||
MockBindlesHeapsHelper *bindlessHeapsHelperPtr = bindlessHeapsHelper.get();
|
||||
|
||||
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->bindlessHeapsHelper.reset(bindlessHeapsHelper.release());
|
||||
|
||||
queueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
|
||||
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, false, returnValue));
|
||||
ASSERT_NE(nullptr, commandQueue);
|
||||
|
||||
bindlessHeapsHelperPtr->stateCacheDirtyForContext.set(commandQueue->getCsr()->getOsContext().getContextId());
|
||||
|
||||
auto usedSpaceBefore = commandQueue->commandStream.getUsed();
|
||||
|
||||
ze_command_list_handle_t commandLists[] = {
|
||||
CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle(),
|
||||
CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()};
|
||||
uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]);
|
||||
CommandList::fromHandle(commandLists[0])->close();
|
||||
CommandList::fromHandle(commandLists[1])->close();
|
||||
auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
|
||||
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto usedSpaceAfter = commandQueue->commandStream.getUsed();
|
||||
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandQueue->commandStream.getCpuBase(), 0), usedSpaceAfter));
|
||||
|
||||
auto pipeControls = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(0u, pipeControls.size());
|
||||
|
||||
auto pipeControl = reinterpret_cast<PIPE_CONTROL *>(*pipeControls[0]);
|
||||
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
|
||||
EXPECT_TRUE(pipeControl->getStateCacheInvalidationEnable());
|
||||
EXPECT_TRUE(pipeControl->getTextureCacheInvalidationEnable());
|
||||
EXPECT_TRUE(pipeControl->getRenderTargetCacheFlushEnable());
|
||||
|
||||
EXPECT_FALSE(bindlessHeapsHelperPtr->getStateDirtyForContext(commandQueue->getCsr()->getOsContext().getContextId()));
|
||||
|
||||
for (auto i = 0u; i < numCommandLists; i++) {
|
||||
auto commandList = CommandList::fromHandle(commandLists[i]);
|
||||
commandList->destroy();
|
||||
}
|
||||
|
||||
commandQueue->destroy();
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
|
|
@ -302,9 +302,19 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushImmediateTask(
|
|||
|
||||
handleImmediateFlushJumpToImmediate(flushData);
|
||||
|
||||
bool stateCacheFlushRequired = device.getBindlessHeapsHelper() ? device.getBindlessHeapsHelper()->getStateDirtyForContext(getOsContext().getContextId()) : false;
|
||||
if (stateCacheFlushRequired) {
|
||||
flushData.estimatedSize += MemorySynchronizationCommands<GfxFamily>::getSizeForFullCacheFlush();
|
||||
}
|
||||
|
||||
auto &csrCommandStream = getCS(flushData.estimatedSize);
|
||||
flushData.csrStartOffset = csrCommandStream.getUsed();
|
||||
|
||||
if (stateCacheFlushRequired) {
|
||||
device.getBindlessHeapsHelper()->clearStateDirtyForContext(getOsContext().getContextId());
|
||||
MemorySynchronizationCommands<GfxFamily>::addStateCacheFlush(csrCommandStream, device.getRootDeviceEnvironment());
|
||||
}
|
||||
|
||||
dispatchImmediateFlushPipelineSelectCommand(flushData, csrCommandStream);
|
||||
dispatchImmediateFlushFrontEndCommand(flushData, device, csrCommandStream);
|
||||
dispatchImmediateFlushStateComputeModeCommand(flushData, csrCommandStream);
|
||||
|
@ -468,7 +478,13 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
|||
|
||||
handleFrontEndStateTransition(dispatchFlags);
|
||||
|
||||
auto &commandStreamCSR = this->getCS(getRequiredCmdStreamSizeAligned(dispatchFlags, device));
|
||||
auto estimatedSize = getRequiredCmdStreamSizeAligned(dispatchFlags, device);
|
||||
|
||||
bool stateCacheFlushRequired = device.getBindlessHeapsHelper() ? device.getBindlessHeapsHelper()->getStateDirtyForContext(getOsContext().getContextId()) : false;
|
||||
if (stateCacheFlushRequired) {
|
||||
estimatedSize += MemorySynchronizationCommands<GfxFamily>::getSizeForFullCacheFlush();
|
||||
}
|
||||
auto &commandStreamCSR = this->getCS(estimatedSize);
|
||||
auto commandStreamStartCSR = commandStreamCSR.getUsed();
|
||||
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStreamCSR, dispatchFlags.csrDependencies, false);
|
||||
|
@ -517,6 +533,11 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
|||
experimentalCmdBuffer->injectBufferStart<GfxFamily>(commandStreamCSR, startingOffset);
|
||||
}
|
||||
|
||||
if (stateCacheFlushRequired) {
|
||||
device.getBindlessHeapsHelper()->clearStateDirtyForContext(getOsContext().getContextId());
|
||||
MemorySynchronizationCommands<GfxFamily>::addStateCacheFlush(commandStreamCSR, device.getRootDeviceEnvironment());
|
||||
}
|
||||
|
||||
if (requiresInstructionCacheFlush) {
|
||||
PipeControlArgs args;
|
||||
args.instructionCacheInvalidateEnable = true;
|
||||
|
|
|
@ -433,6 +433,7 @@ struct MemorySynchronizationCommands {
|
|||
|
||||
static void addFullCacheFlush(LinearStream &commandStream, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
static void setCacheFlushExtraProperties(PipeControlArgs &args);
|
||||
static void addStateCacheFlush(LinearStream &commandStream, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
|
||||
static size_t getSizeForBarrierWithPostSyncOperation(const RootDeviceEnvironment &rootDeviceEnvironment, bool tlbInvalidationRequired);
|
||||
static size_t getSizeForBarrierWa(const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
|
|
|
@ -512,6 +512,20 @@ void MemorySynchronizationCommands<GfxFamily>::addFullCacheFlush(LinearStream &c
|
|||
MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(commandStream, args);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void MemorySynchronizationCommands<GfxFamily>::addStateCacheFlush(LinearStream &commandStream, const RootDeviceEnvironment &rootDeviceEnvironment) {
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
|
||||
PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl;
|
||||
cmd.setCommandStreamerStallEnable(true);
|
||||
cmd.setRenderTargetCacheFlushEnable(true);
|
||||
cmd.setStateCacheInvalidationEnable(true);
|
||||
cmd.setTextureCacheInvalidationEnable(true);
|
||||
|
||||
auto commandsBuffer = commandStream.getSpace(sizeof(PIPE_CONTROL));
|
||||
*reinterpret_cast<PIPE_CONTROL *>(commandsBuffer) = cmd;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
const StackVec<size_t, 3> GfxCoreHelperHw<GfxFamily>::getDeviceSubGroupSizes() const {
|
||||
return {8, 16, 32};
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
#include "shared/test/common/helpers/gtest_helpers.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/mocks/mock_allocation_properties.h"
|
||||
#include "shared/test/common/mocks/mock_bindless_heaps_helper.h"
|
||||
#include "shared/test/common/mocks/mock_csr.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
#include "shared/test/common/mocks/mock_driver_model.h"
|
||||
|
@ -4489,3 +4490,81 @@ HWTEST_F(CommandStreamReceiverTest, givenCsrWhenCleanUpResourcesThenOwnedPrivate
|
|||
csr.cleanupResources();
|
||||
EXPECT_EQ(mapForReuse->size(), 0u);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverHwTest, GivenDirtyFlagForContextInBindlessHelperWhenFlushTaskCalledThenStateCacheInvalidateIsSent) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
auto bindlessHeapsHelper = std::make_unique<MockBindlesHeapsHelper>(pDevice->getMemoryManager(), pDevice->getNumGenericSubDevices() > 1, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
MockBindlesHeapsHelper *bindlessHeapsHelperPtr = bindlessHeapsHelper.get();
|
||||
pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->bindlessHeapsHelper.reset(bindlessHeapsHelper.release());
|
||||
|
||||
bindlessHeapsHelperPtr->stateCacheDirtyForContext.set(commandStreamReceiver.getOsContext().getContextId());
|
||||
|
||||
flushTaskFlags.implicitFlush = true;
|
||||
auto usedSpaceBefore = commandStreamReceiver.commandStream.getUsed();
|
||||
|
||||
commandStreamReceiver.flushTask(commandStream,
|
||||
0,
|
||||
&dsh,
|
||||
&ioh,
|
||||
nullptr,
|
||||
taskLevel,
|
||||
flushTaskFlags,
|
||||
*pDevice);
|
||||
|
||||
auto usedSpaceAfter = commandStreamReceiver.commandStream.getUsed();
|
||||
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandStreamReceiver.commandStream.getCpuBase(), 0), usedSpaceAfter));
|
||||
|
||||
auto pipeControls = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(0u, pipeControls.size());
|
||||
|
||||
bool pcFound = false;
|
||||
for (size_t i = 0; i < pipeControls.size(); i++) {
|
||||
auto pipeControl = reinterpret_cast<PIPE_CONTROL *>(*pipeControls[i]);
|
||||
bool csStall = pipeControl->getCommandStreamerStallEnable();
|
||||
bool stateCache = pipeControl->getStateCacheInvalidationEnable();
|
||||
bool texCache = pipeControl->getTextureCacheInvalidationEnable();
|
||||
bool renderTargetCache = pipeControl->getRenderTargetCacheFlushEnable();
|
||||
|
||||
if (csStall && stateCache && texCache && renderTargetCache) {
|
||||
pcFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
EXPECT_TRUE(pcFound);
|
||||
EXPECT_FALSE(bindlessHeapsHelperPtr->getStateDirtyForContext(commandStreamReceiver.getOsContext().getContextId()));
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverHwTest, GivenDirtyFlagForContextInBindlessHelperWhenFlushImmediateTaskCalledThenStateCacheInvalidateIsSent) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
auto bindlessHeapsHelper = std::make_unique<MockBindlesHeapsHelper>(pDevice->getMemoryManager(), pDevice->getNumGenericSubDevices() > 1, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
MockBindlesHeapsHelper *bindlessHeapsHelperPtr = bindlessHeapsHelper.get();
|
||||
pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->bindlessHeapsHelper.reset(bindlessHeapsHelper.release());
|
||||
|
||||
bindlessHeapsHelperPtr->stateCacheDirtyForContext.set(commandStreamReceiver.getOsContext().getContextId());
|
||||
|
||||
this->requiredStreamProperties.stateComputeMode.setPropertiesAll(false, GrfConfig::DefaultGrfNumber, ThreadArbitrationPolicy::AgeBased, NEO::PreemptionMode::ThreadGroup);
|
||||
|
||||
commandStreamReceiver.flushImmediateTask(commandStream, commandStream.getUsed(), immediateFlushTaskFlags, *pDevice);
|
||||
|
||||
HardwareParse hwParserCsr;
|
||||
hwParserCsr.parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
|
||||
auto pcCmd = hwParserCsr.getCommand<PIPE_CONTROL>();
|
||||
ASSERT_NE(nullptr, pcCmd);
|
||||
|
||||
EXPECT_TRUE(pcCmd->getCommandStreamerStallEnable());
|
||||
EXPECT_TRUE(pcCmd->getStateCacheInvalidationEnable());
|
||||
EXPECT_TRUE(pcCmd->getTextureCacheInvalidationEnable());
|
||||
EXPECT_TRUE(pcCmd->getRenderTargetCacheFlushEnable());
|
||||
|
||||
EXPECT_FALSE(bindlessHeapsHelperPtr->getStateDirtyForContext(commandStreamReceiver.getOsContext().getContextId()));
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue