Flush state caches after command list is destroyed

When state base address tracking is enabled and command list use private heaps
then command list at destroy time must calls all compute CSRs that were using
that heap to invalidate state caches.
This allows new command list to reuse the same heap allocation for different
surface states, so before new use cached states are invalidated.

Related-To: NEO-5055

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2023-03-27 21:37:18 +00:00
committed by Compute-Runtime-Automation
parent 820a189c52
commit 6437c1a91e
16 changed files with 185 additions and 5 deletions

View File

@@ -11,10 +11,13 @@
#include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/linear_stream.h"
#include "shared/source/command_stream/wait_status.h" #include "shared/source/command_stream/wait_status.h"
#include "shared/source/device/device.h" #include "shared/source/device/device.h"
#include "shared/source/helpers/engine_control.h"
#include "shared/source/helpers/engine_node_helper.h" #include "shared/source/helpers/engine_node_helper.h"
#include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/logical_state_helper.h" #include "shared/source/helpers/logical_state_helper.h"
#include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/indirect_heap/indirect_heap.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/os_interface/os_context.h"
#include "shared/source/os_interface/sys_calls_common.h" #include "shared/source/os_interface/sys_calls_common.h"
#include "level_zero/core/source/cmdqueue/cmdqueue.h" #include "level_zero/core/source/cmdqueue/cmdqueue.h"
@@ -44,6 +47,27 @@ ze_result_t CommandListImp::destroy() {
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout; auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
this->csr->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutMicroseconds}, this->csr->peekTaskCount()); this->csr->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutMicroseconds}, this->csr->peekTaskCount());
} }
if (this->cmdListType == CommandListType::TYPE_REGULAR &&
!isCopyOnly() &&
this->stateBaseAddressTracking &&
this->cmdListHeapAddressModel == NEO::HeapAddressModel::PrivateHeaps) {
auto memoryManager = device->getNEODevice()->getMemoryManager();
auto heapAllocation = this->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE)->getGraphicsAllocation();
for (auto &engine : memoryManager->getRegisteredEngines()) {
if (NEO::EngineHelpers::isComputeEngine(engine.getEngineType())) {
auto contextId = engine.osContext->getContextId();
if (heapAllocation->isUsedByOsContext(contextId)) {
engine.commandStreamReceiver->sendRenderStateCacheFlush();
engine.commandStreamReceiver->waitForCompletionWithTimeout(NEO::WaitParams{false, false, NEO::TimeoutControls::maxTimeout}, engine.commandStreamReceiver->peekTaskCount());
}
}
}
}
delete this; delete this;
return ZE_RESULT_SUCCESS; return ZE_RESULT_SUCCESS;
} }

View File

@@ -113,6 +113,8 @@ void ModuleMutableCommandListFixture::setUpImpl() {
} }
void ModuleMutableCommandListFixture::setUp(uint32_t revision) { void ModuleMutableCommandListFixture::setUp(uint32_t revision) {
backupHwInfo = std::make_unique<VariableBackup<HardwareInfo>>(defaultHwInfo.get());
defaultHwInfo->capabilityTable.blitterOperationsSupported = true;
if (revision != 0) { if (revision != 0) {
DebugManager.flags.OverrideRevision.set(revision); DebugManager.flags.OverrideRevision.set(revision);
} }

View File

@@ -79,6 +79,7 @@ struct ModuleMutableCommandListFixture : public ModuleImmutableDataFixture {
std::unique_ptr<L0::ult::CommandList> commandList; std::unique_ptr<L0::ult::CommandList> commandList;
std::unique_ptr<L0::ult::CommandList> commandListImmediate; std::unique_ptr<L0::ult::CommandList> commandListImmediate;
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel; std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
std::unique_ptr<VariableBackup<HardwareInfo>> backupHwInfo;
L0::ult::CommandQueue *commandQueue; L0::ult::CommandQueue *commandQueue;
NEO::EngineGroupType engineGroupType; NEO::EngineGroupType engineGroupType;

View File

@@ -2558,5 +2558,31 @@ HWTEST2_F(CommandListStateBaseAddressPrivateHeapTest,
ASSERT_EQ(0u, sbaCmds.size()); ASSERT_EQ(0u, sbaCmds.size());
} }
HWTEST2_F(CommandListStateBaseAddressPrivateHeapTest,
givenCommandListUsingPrivateSurfaceHeapWhenCommandListDestroyedThenCsrDispatchesStateCacheFlush,
IsAtLeastSkl) {
auto &csr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
auto &csrStream = csr.commandStream;
ze_result_t returnValue;
L0::ult::CommandList *cmdListObject = whiteboxCast(CommandList::create(productFamily, device, engineGroupType, 0u, returnValue));
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
cmdListObject->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
returnValue = cmdListObject->close();
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
auto cmdListHandle = cmdListObject->toHandle();
returnValue = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
returnValue = cmdListObject->destroy();
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::findStateCacheFlushPipeControl(csrStream));
}
} // namespace ult } // namespace ult
} // namespace L0 } // namespace L0

View File

@@ -2275,5 +2275,31 @@ HWTEST2_F(CommandListStateBaseAddressGlobalStatelessTest,
EXPECT_EQ(scratchAllocation->getGpuAddress(), scratchSurfaceState->getSurfaceBaseAddress()); EXPECT_EQ(scratchAllocation->getGpuAddress(), scratchSurfaceState->getSurfaceBaseAddress());
} }
HWTEST2_F(CommandListStateBaseAddressGlobalStatelessTest,
givenCommandListNotUsingPrivateSurfaceHeapWhenCommandListDestroyedThenCsrDoesNotDispatchStateCacheFlush,
IsAtLeastSkl) {
auto &csr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
auto &csrStream = csr.commandStream;
ze_result_t returnValue;
L0::ult::CommandList *cmdListObject = whiteboxCast(CommandList::create(productFamily, device, engineGroupType, 0u, returnValue));
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
cmdListObject->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
returnValue = cmdListObject->close();
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
auto cmdListHandle = cmdListObject->toHandle();
returnValue = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
returnValue = cmdListObject->destroy();
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
EXPECT_EQ(0u, csrStream.getUsed());
}
} // namespace ult } // namespace ult
} // namespace L0 } // namespace L0

View File

@@ -567,6 +567,10 @@ class CommandStreamReceiverMock : public CommandStreamReceiver {
return cs; return cs;
} }
SubmissionStatus sendRenderStateCacheFlush() override {
return SubmissionStatus::SUCCESS;
}
bool flushBatchedSubmissions() override { return true; } bool flushBatchedSubmissions() override { return true; }
CommandStreamReceiverType getType() const override { CommandStreamReceiverType getType() const override {

View File

@@ -93,6 +93,7 @@ class CommandStreamReceiver {
const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh,
TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) = 0; TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) = 0;
virtual CompletionStamp flushBcsTask(LinearStream &commandStream, size_t commandStreamStart, const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) = 0; virtual CompletionStamp flushBcsTask(LinearStream &commandStream, size_t commandStreamStart, const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) = 0;
virtual SubmissionStatus sendRenderStateCacheFlush() = 0;
virtual bool flushBatchedSubmissions() = 0; virtual bool flushBatchedSubmissions() = 0;
MOCKABLE_VIRTUAL SubmissionStatus submitBatchBuffer(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency); MOCKABLE_VIRTUAL SubmissionStatus submitBatchBuffer(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency);

View File

@@ -100,10 +100,11 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
SubmissionStatus flushTagUpdate() override; SubmissionStatus flushTagUpdate() override;
SubmissionStatus flushMiFlushDW(); SubmissionStatus flushMiFlushDW();
SubmissionStatus flushPipeControl(); SubmissionStatus flushPipeControl(bool stateCacheFlush);
SubmissionStatus flushSmallTask(LinearStream &commandStreamTask, SubmissionStatus flushSmallTask(LinearStream &commandStreamTask,
size_t commandStreamStartTask); size_t commandStreamStartTask);
SubmissionStatus flushHandler(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency); SubmissionStatus flushHandler(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency);
SubmissionStatus sendRenderStateCacheFlush() override;
bool isUpdateTagFromWaitEnabled() override; bool isUpdateTagFromWaitEnabled() override;
void updateTagFromWait() override; void updateTagFromWait() override;

View File

@@ -1365,7 +1365,7 @@ inline SubmissionStatus CommandStreamReceiverHw<GfxFamily>::flushTagUpdate() {
if (EngineHelpers::isBcs(this->osContext->getEngineType())) { if (EngineHelpers::isBcs(this->osContext->getEngineType())) {
return this->flushMiFlushDW(); return this->flushMiFlushDW();
} else { } else {
return this->flushPipeControl(); return this->flushPipeControl(false);
} }
} }
return SubmissionStatus::DEVICE_UNINITIALIZED; return SubmissionStatus::DEVICE_UNINITIALIZED;
@@ -1393,7 +1393,7 @@ inline SubmissionStatus CommandStreamReceiverHw<GfxFamily>::flushMiFlushDW() {
} }
template <typename GfxFamily> template <typename GfxFamily>
SubmissionStatus CommandStreamReceiverHw<GfxFamily>::flushPipeControl() { SubmissionStatus CommandStreamReceiverHw<GfxFamily>::flushPipeControl(bool stateCacheFlush) {
auto lock = obtainUniqueOwnership(); auto lock = obtainUniqueOwnership();
PipeControlArgs args; PipeControlArgs args;
@@ -1401,6 +1401,12 @@ SubmissionStatus CommandStreamReceiverHw<GfxFamily>::flushPipeControl() {
args.notifyEnable = isUsedNotifyEnableForPostSync(); args.notifyEnable = isUsedNotifyEnableForPostSync();
args.workloadPartitionOffset = isMultiTileOperationEnabled(); args.workloadPartitionOffset = isMultiTileOperationEnabled();
if (stateCacheFlush) {
args.textureCacheInvalidationEnable = true;
args.renderTargetCacheFlushEnable = true;
args.stateCacheInvalidationEnable = true;
}
auto dispatchSize = MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), args.tlbInvalidation) + this->getCmdSizeForPrologue(); auto dispatchSize = MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), args.tlbInvalidation) + this->getCmdSizeForPrologue();
auto &commandStream = getCS(dispatchSize); auto &commandStream = getCS(dispatchSize);
@@ -1454,6 +1460,11 @@ SubmissionStatus CommandStreamReceiverHw<GfxFamily>::flushSmallTask(LinearStream
return submissionStatus; return submissionStatus;
} }
template <typename GfxFamily>
SubmissionStatus CommandStreamReceiverHw<GfxFamily>::sendRenderStateCacheFlush() {
return this->flushPipeControl(true);
}
template <typename GfxFamily> template <typename GfxFamily>
inline SubmissionStatus CommandStreamReceiverHw<GfxFamily>::flushHandler(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) { inline SubmissionStatus CommandStreamReceiverHw<GfxFamily>::flushHandler(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) {
auto status = flush(batchBuffer, allocationsForResidency); auto status = flush(batchBuffer, allocationsForResidency);

View File

@@ -75,6 +75,10 @@ bool isCcs(aub_stream::EngineType engineType) {
return engineType >= aub_stream::ENGINE_CCS && engineType <= aub_stream::ENGINE_CCS3; return engineType >= aub_stream::ENGINE_CCS && engineType <= aub_stream::ENGINE_CCS3;
} }
bool isComputeEngine(aub_stream::EngineType engineType) {
return isCcs(engineType) || engineType == aub_stream::ENGINE_RCS || engineType == aub_stream::ENGINE_CCCS;
}
bool isBcs(aub_stream::EngineType engineType) { bool isBcs(aub_stream::EngineType engineType) {
return engineType == aub_stream::ENGINE_BCS || (engineType >= aub_stream::ENGINE_BCS1 && engineType <= aub_stream::ENGINE_BCS8); return engineType == aub_stream::ENGINE_BCS || (engineType >= aub_stream::ENGINE_BCS1 && engineType <= aub_stream::ENGINE_BCS8);
} }

View File

@@ -45,6 +45,7 @@ struct EngineDescriptor {
namespace EngineHelpers { namespace EngineHelpers {
bool isCcs(aub_stream::EngineType engineType); bool isCcs(aub_stream::EngineType engineType);
bool isComputeEngine(aub_stream::EngineType engineType);
bool isBcs(aub_stream::EngineType engineType); bool isBcs(aub_stream::EngineType engineType);
bool isBcsVirtualEngineEnabled(aub_stream::EngineType engineType); bool isBcsVirtualEngineEnabled(aub_stream::EngineType engineType);
aub_stream::EngineType getBcsEngineType(const RootDeviceEnvironment &rootDeviceEnvironment, const DeviceBitfield &deviceBitfield, SelectorCopyEngine &selectorCopyEngine, bool internalUsage); aub_stream::EngineType getBcsEngineType(const RootDeviceEnvironment &rootDeviceEnvironment, const DeviceBitfield &deviceBitfield, SelectorCopyEngine &selectorCopyEngine, bool internalUsage);

View File

@@ -93,6 +93,8 @@ struct UnitTestHelper {
static bool getSystolicFlagValueFromPipelineSelectCommand(const typename GfxFamily::PIPELINE_SELECT &pipelineSelectCmd); static bool getSystolicFlagValueFromPipelineSelectCommand(const typename GfxFamily::PIPELINE_SELECT &pipelineSelectCmd);
static size_t getAdditionalDshSize(uint32_t iddCount); static size_t getAdditionalDshSize(uint32_t iddCount);
static bool expectNullDsh(const DeviceInfo &deviceInfo); static bool expectNullDsh(const DeviceInfo &deviceInfo);
static bool findStateCacheFlushPipeControl(LinearStream &csrStream);
}; };
} // namespace NEO } // namespace NEO

View File

@@ -5,6 +5,7 @@
* *
*/ */
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/device/device_info.h" #include "shared/source/device/device_info.h"
#include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/hw_info.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h"
@@ -113,4 +114,29 @@ bool UnitTestHelper<GfxFamily>::expectNullDsh(const DeviceInfo &deviceInfo) {
return true; return true;
} }
template <typename GfxFamily>
bool UnitTestHelper<GfxFamily>::findStateCacheFlushPipeControl(LinearStream &csrStream) {
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
HardwareParse hwParserCsr;
hwParserCsr.parsePipeControl = true;
hwParserCsr.parseCommands<GfxFamily>(csrStream, 0);
hwParserCsr.findHardwareCommands<GfxFamily>();
bool stateCacheFlushFound = false;
auto itorPipeControl = hwParserCsr.pipeControlList.begin();
while (itorPipeControl != hwParserCsr.pipeControlList.end()) {
auto pipeControl = reinterpret_cast<PIPE_CONTROL *>(*itorPipeControl);
if (pipeControl->getRenderTargetCacheFlushEnable() &&
pipeControl->getStateCacheInvalidationEnable() &&
pipeControl->getTextureCacheInvalidationEnable()) {
stateCacheFlushFound = true;
break;
}
itorPipeControl++;
}
return stateCacheFlushFound;
}
} // namespace NEO } // namespace NEO

View File

@@ -114,6 +114,10 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
CompletionStamp flushBcsTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart, CompletionStamp flushBcsTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart,
const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) override; const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) override;
SubmissionStatus sendRenderStateCacheFlush() override {
return SubmissionStatus::SUCCESS;
}
bool flushBatchedSubmissions() override { bool flushBatchedSubmissions() override {
if (flushBatchedSubmissionsCallCounter) { if (flushBatchedSubmissionsCallCounter) {
(*flushBatchedSubmissionsCallCounter)++; (*flushBatchedSubmissionsCallCounter)++;

View File

@@ -2777,10 +2777,10 @@ HWTEST_F(CommandStreamReceiverHwTest, givenOutOfMemoryFailureOnFlushWhenFlushing
commandStreamReceiver.flushReturnValue = SubmissionStatus::OUT_OF_MEMORY; commandStreamReceiver.flushReturnValue = SubmissionStatus::OUT_OF_MEMORY;
EXPECT_EQ(SubmissionStatus::OUT_OF_MEMORY, commandStreamReceiver.flushPipeControl()); EXPECT_EQ(SubmissionStatus::OUT_OF_MEMORY, commandStreamReceiver.flushPipeControl(false));
commandStreamReceiver.flushReturnValue = SubmissionStatus::OUT_OF_HOST_MEMORY; commandStreamReceiver.flushReturnValue = SubmissionStatus::OUT_OF_HOST_MEMORY;
EXPECT_EQ(SubmissionStatus::OUT_OF_HOST_MEMORY, commandStreamReceiver.flushPipeControl()); EXPECT_EQ(SubmissionStatus::OUT_OF_HOST_MEMORY, commandStreamReceiver.flushPipeControl(false));
} }
HWTEST_F(CommandStreamReceiverHwTest, givenOutOfMemoryFailureOnFlushWhenFlushingTagUpdateThenErrorIsPropagated) { HWTEST_F(CommandStreamReceiverHwTest, givenOutOfMemoryFailureOnFlushWhenFlushingTagUpdateThenErrorIsPropagated) {
@@ -3097,3 +3097,19 @@ HWTEST2_F(CommandStreamReceiverHwTest,
auto scmCmd = hwParserCsr.getCommand<STATE_COMPUTE_MODE>(); auto scmCmd = hwParserCsr.getCommand<STATE_COMPUTE_MODE>();
EXPECT_NE(nullptr, scmCmd); EXPECT_NE(nullptr, scmCmd);
} }
HWTEST_F(CommandStreamReceiverHwTest, givenFlushPipeControlWhenFlushWithoutStateCacheFlushThenExpectNoStateCacheFlushFlagsSet) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.flushPipeControl(false);
EXPECT_FALSE(UnitTestHelper<FamilyType>::findStateCacheFlushPipeControl(commandStreamReceiver.commandStream));
}
HWTEST_F(CommandStreamReceiverHwTest, givenFlushPipeControlWhenFlushWithStateCacheFlushThenExpectStateCacheFlushFlagsSet) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.sendRenderStateCacheFlush();
EXPECT_TRUE(UnitTestHelper<FamilyType>::findStateCacheFlushPipeControl(commandStreamReceiver.commandStream));
}

View File

@@ -149,3 +149,34 @@ TEST(EngineNodeHelperTest, givenLinkCopyEnginesAndInternalUsageEnabledWhenGettin
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS3, EngineHelpers::getBcsEngineType(rootDeviceEnvironment, deviceBitfield, selectorCopyEngine, isInternalUsage)); EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS3, EngineHelpers::getBcsEngineType(rootDeviceEnvironment, deviceBitfield, selectorCopyEngine, isInternalUsage));
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS3, EngineHelpers::getBcsEngineType(rootDeviceEnvironment, deviceBitfield, selectorCopyEngine, isInternalUsage)); EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS3, EngineHelpers::getBcsEngineType(rootDeviceEnvironment, deviceBitfield, selectorCopyEngine, isInternalUsage));
} }
TEST(EngineNodeHelperTest, givenAllEnginesWhenCheckingEngineIsComputeCapableThenReturnTrueOnlyForCompute) {
struct EngineProperties {
aub_stream::EngineType engineType;
bool isCompute;
};
const EngineProperties engines[] = {
{aub_stream::ENGINE_RCS, true},
{aub_stream::ENGINE_CCS, true},
{aub_stream::ENGINE_CCS1, true},
{aub_stream::ENGINE_CCS2, true},
{aub_stream::ENGINE_CCS3, true},
{aub_stream::ENGINE_CCCS, true},
{aub_stream::ENGINE_BCS, false},
{aub_stream::ENGINE_BCS1, false},
{aub_stream::ENGINE_BCS2, false},
{aub_stream::ENGINE_BCS3, false},
{aub_stream::ENGINE_BCS4, false},
{aub_stream::ENGINE_BCS5, false},
{aub_stream::ENGINE_BCS6, false},
{aub_stream::ENGINE_BCS7, false},
{aub_stream::ENGINE_BCS8, false}};
const size_t numEngines = sizeof(engines) / sizeof(EngineProperties);
for (size_t i = 0; i < numEngines; i++) {
EXPECT_EQ(engines[i].isCompute, EngineHelpers::isComputeEngine(engines[i].engineType));
}
}