mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 14:02:58 +08:00
Fix issues in state base address properties tracking
- add correct stateless mocs state update in immediate command lists - disallow stateless mocs dirty sba command dispatch when sba tracking enabled - checks support first, only then do the dirty state check in csr Related-To: NEO-5055 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
4c32529b5b
commit
2d6e5c2588
@@ -25,6 +25,10 @@ class LogicalStateHelper;
|
||||
|
||||
namespace L0 {
|
||||
enum class Builtin : uint32_t;
|
||||
|
||||
struct Event;
|
||||
struct EventPool;
|
||||
|
||||
#pragma pack(1)
|
||||
struct EventData {
|
||||
uint64_t address;
|
||||
@@ -60,9 +64,6 @@ struct CmdListEventOperation {
|
||||
bool workPartitionOperation = false;
|
||||
};
|
||||
|
||||
struct EventPool;
|
||||
struct Event;
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
struct CommandListCoreFamily : CommandListImp {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
@@ -309,6 +310,13 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
void dispatchPostSyncCommands(const CmdListEventOperation &eventOperations, uint64_t gpuAddress, uint32_t value, bool useLastPipeControl, bool signalScope);
|
||||
void dispatchEventRemainingPacketsPostSyncOperation(Event *event);
|
||||
void dispatchEventPostSyncOperation(Event *event, uint32_t value, bool omitFirstOperation, bool useMax, bool useLastPipeControl);
|
||||
bool isKernelUncachedMocsRequired(bool kernelState) {
|
||||
this->containsStatelessUncachedResource |= kernelState;
|
||||
if (this->stateBaseAddressTracking) {
|
||||
return false;
|
||||
}
|
||||
return this->containsStatelessUncachedResource;
|
||||
}
|
||||
|
||||
static constexpr int32_t cmdListDefaultEngineInstancedDevice = NEO::StreamProperty::initValue;
|
||||
static constexpr bool cmdListDefaultCoherency = false;
|
||||
|
||||
@@ -131,7 +131,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
}
|
||||
|
||||
KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
|
||||
this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs();
|
||||
bool uncachedMocsKernel = isKernelUncachedMocsRequired(kernelImp->getKernelRequiresUncachedMocs());
|
||||
this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs();
|
||||
|
||||
NEO::Device *neoDevice = device->getNEODevice();
|
||||
@@ -166,7 +166,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
launchParams.isIndirect, // isIndirect
|
||||
launchParams.isPredicate, // isPredicate
|
||||
false, // isTimestampEvent
|
||||
this->containsStatelessUncachedResource, // requiresUncachedMocs
|
||||
uncachedMocsKernel, // requiresUncachedMocs
|
||||
false, // useGlobalAtomics
|
||||
internalUsage, // isInternal
|
||||
launchParams.isCooperative, // isCooperative
|
||||
@@ -178,7 +178,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
};
|
||||
|
||||
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs, getLogicalStateHelper());
|
||||
this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs;
|
||||
if (!this->isFlushTaskSubmissionEnabled) {
|
||||
this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs;
|
||||
}
|
||||
|
||||
if (neoDevice->getDebugger() && !this->immediateCmdListHeapSharing) {
|
||||
auto *ssh = commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE);
|
||||
|
||||
@@ -70,6 +70,12 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::updateDispatchFlagsWithRequi
|
||||
|
||||
const auto &requiredPipelineSelect = this->requiredStreamState.pipelineSelect;
|
||||
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = requiredPipelineSelect.systolicMode.value == 1;
|
||||
if (this->containsStatelessUncachedResource) {
|
||||
dispatchFlags.l3CacheSettings = NEO::L3CachingSettings::l3CacheOff;
|
||||
this->containsStatelessUncachedResource = false;
|
||||
} else {
|
||||
dispatchFlags.l3CacheSettings = NEO::L3CachingSettings::l3CacheOn;
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -103,7 +109,7 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushRegular
|
||||
this->isSyncModeQueue, // dcFlush
|
||||
this->getCommandListSLMEnable(), // useSLM
|
||||
this->isSyncModeQueue, // guardCommandBufferWithPipeControl
|
||||
false, // GSBA32BitRequired
|
||||
false, // gsba32BitRequired
|
||||
false, // requiresCoherency
|
||||
false, // lowPriority
|
||||
true, // implicitFlush
|
||||
|
||||
@@ -262,7 +262,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
}
|
||||
|
||||
KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
|
||||
this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs();
|
||||
bool uncachedMocsKernel = isKernelUncachedMocsRequired(kernelImp->getKernelRequiresUncachedMocs());
|
||||
this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs();
|
||||
|
||||
updateStreamProperties(*kernel, launchParams.isCooperative, threadGroupDimensions, launchParams.isIndirect);
|
||||
@@ -293,7 +293,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
launchParams.isIndirect, // isIndirect
|
||||
launchParams.isPredicate, // isPredicate
|
||||
isTimestampEvent, // isTimestampEvent
|
||||
this->containsStatelessUncachedResource, // requiresUncachedMocs
|
||||
uncachedMocsKernel, // requiresUncachedMocs
|
||||
kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, // useGlobalAtomics
|
||||
internalUsage, // isInternal
|
||||
launchParams.isCooperative, // isCooperative
|
||||
@@ -304,7 +304,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
this->dcFlushSupport // dcFlushEnable
|
||||
};
|
||||
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs, getLogicalStateHelper());
|
||||
this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs;
|
||||
if (!this->isFlushTaskSubmissionEnabled) {
|
||||
this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs;
|
||||
}
|
||||
|
||||
if (compactEvent) {
|
||||
appendEventForProfilingAllWalkers(compactEvent, false, true);
|
||||
|
||||
@@ -188,7 +188,8 @@ struct CommandQueueHw : public CommandQueueImp {
|
||||
NEO::StreamProperties &csrState,
|
||||
const NEO::StreamProperties &cmdListRequired,
|
||||
const NEO::StreamProperties &cmdListFinal);
|
||||
inline void programRequiredStateBaseAddressForCommandList(CommandList *commandList,
|
||||
inline void programRequiredStateBaseAddressForCommandList(CommandListExecutionContext &ctx,
|
||||
CommandList *commandList,
|
||||
NEO::LinearStream &commandStream,
|
||||
NEO::StreamProperties &csrState,
|
||||
const NEO::StreamProperties &cmdListRequired,
|
||||
|
||||
@@ -170,7 +170,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
|
||||
this->programOneCmdListPipelineSelect(commandList, child, csrStateProperties, requiredStreamState, finalStreamState);
|
||||
this->programOneCmdListFrontEndIfDirty(ctx, child, csrStateProperties, requiredStreamState, finalStreamState);
|
||||
this->programRequiredStateComputeModeForCommandList(commandList, child, csrStateProperties, requiredStreamState, finalStreamState);
|
||||
this->programRequiredStateBaseAddressForCommandList(commandList, child, csrStateProperties, requiredStreamState, finalStreamState);
|
||||
this->programRequiredStateBaseAddressForCommandList(ctx, commandList, child, csrStateProperties, requiredStreamState, finalStreamState);
|
||||
|
||||
this->patchCommands(*commandList, this->csr->getScratchSpaceController()->getScratchPatchAddress());
|
||||
this->programOneCmdListBatchBufferStart(commandList, child, ctx);
|
||||
@@ -1210,7 +1210,8 @@ void CommandQueueHw<gfxCoreFamily>::programRequiredStateComputeModeForCommandLis
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::programRequiredStateBaseAddressForCommandList(CommandList *commandList,
|
||||
void CommandQueueHw<gfxCoreFamily>::programRequiredStateBaseAddressForCommandList(CommandListExecutionContext &ctx,
|
||||
CommandList *commandList,
|
||||
NEO::LinearStream &commandStream,
|
||||
NEO::StreamProperties &csrState,
|
||||
const NEO::StreamProperties &cmdListRequired,
|
||||
|
||||
@@ -84,6 +84,7 @@ struct ModuleImmutableDataFixture : public DeviceFixture {
|
||||
using KernelImp::kernelHasIndirectAccess;
|
||||
using KernelImp::kernelImmData;
|
||||
using KernelImp::kernelRequiresGenerationOfLocalIdsByRuntime;
|
||||
using KernelImp::kernelRequiresUncachedMocsCount;
|
||||
using KernelImp::printfBuffer;
|
||||
using KernelImp::privateMemoryGraphicsAllocation;
|
||||
using KernelImp::requiredWorkgroupOrder;
|
||||
|
||||
@@ -942,7 +942,7 @@ HWTEST2_F(CommandListStateBaseAddressTest,
|
||||
auto ioBaseAddress = container.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT)->getHeapGpuBase();
|
||||
auto ioSize = container.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT)->getHeapSizeInPages();
|
||||
|
||||
auto statlessMocs = device->getMOCS(true, false) >> 1;
|
||||
auto statlessMocs = getMocs(true);
|
||||
|
||||
auto &requiredState = commandList->requiredStreamState.stateBaseAddress;
|
||||
auto &finalState = commandList->finalStreamState.stateBaseAddress;
|
||||
@@ -1132,7 +1132,7 @@ HWTEST2_F(CommandListStateBaseAddressTest,
|
||||
auto ioBaseAddress = container.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT)->getHeapGpuBase();
|
||||
auto ioSize = container.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT)->getHeapSizeInPages();
|
||||
|
||||
auto statlessMocs = device->getMOCS(true, false) >> 1;
|
||||
auto statlessMocs = getMocs(true);
|
||||
|
||||
EXPECT_EQ(static_cast<int32_t>(statlessMocs), csrState.statelessMocs.value);
|
||||
|
||||
@@ -1451,5 +1451,176 @@ HWTEST2_F(CommandListStateBaseAddressTest,
|
||||
EXPECT_EQ(csrState.statelessMocs.value, finalState.statelessMocs.value);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListStateBaseAddressTest,
|
||||
givenStateBaseAddressTrackingWhenRegularCmdListAppendUncachedKernelFirstAndExecuteAndImmediateCmdListAppendUncachedKerneThenMocsStateIsUpdatedInCsr,
|
||||
IsAtLeastSkl) {
|
||||
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
|
||||
|
||||
EXPECT_TRUE(commandList->stateBaseAddressTracking);
|
||||
EXPECT_TRUE(commandListImmediate->stateBaseAddressTracking);
|
||||
|
||||
kernel->kernelRequiresUncachedMocsCount++;
|
||||
|
||||
auto &cmdStream = *commandList->commandContainer.getCommandStream();
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
size_t usedBefore = cmdStream.getUsed();
|
||||
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
size_t usedAfter = cmdStream.getUsed();
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(cmdStream.getCpuBase(), usedBefore),
|
||||
usedAfter - usedBefore));
|
||||
|
||||
auto sbaList = findAll<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(0u, sbaList.size());
|
||||
|
||||
uint32_t uncachedStatlessMocs = getMocs(false);
|
||||
|
||||
auto &requiredState = commandList->requiredStreamState.stateBaseAddress;
|
||||
auto &finalState = commandList->finalStreamState.stateBaseAddress;
|
||||
auto &csrState = commandQueue->getCsr()->getStreamProperties().stateBaseAddress;
|
||||
|
||||
EXPECT_EQ(static_cast<int32_t>(uncachedStatlessMocs), requiredState.statelessMocs.value);
|
||||
EXPECT_EQ(static_cast<int32_t>(uncachedStatlessMocs), finalState.statelessMocs.value);
|
||||
|
||||
result = commandList->close();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
ze_command_list_handle_t cmdListHandle = commandList->toHandle();
|
||||
result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(static_cast<int32_t>(uncachedStatlessMocs), csrState.statelessMocs.value);
|
||||
|
||||
result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(static_cast<int32_t>(uncachedStatlessMocs), csrState.statelessMocs.value);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListStateBaseAddressTest,
|
||||
givenStateBaseAddressTrackingWhenRegularCmdListAppendCachedKernelFirstAndExecuteAndImmediateCmdListAppendUncachedKerneThenMocsStateIsUpdatedInCsr,
|
||||
IsAtLeastSkl) {
|
||||
EXPECT_TRUE(commandList->stateBaseAddressTracking);
|
||||
EXPECT_TRUE(commandListImmediate->stateBaseAddressTracking);
|
||||
|
||||
kernel->kernelRequiresUncachedMocsCount = 0;
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
uint32_t uncachedStatlessMocs = getMocs(false);
|
||||
uint32_t cachedStatlessMocs = getMocs(true);
|
||||
|
||||
auto &requiredState = commandList->requiredStreamState.stateBaseAddress;
|
||||
auto &finalState = commandList->finalStreamState.stateBaseAddress;
|
||||
auto &csrState = commandQueue->getCsr()->getStreamProperties().stateBaseAddress;
|
||||
|
||||
EXPECT_EQ(static_cast<int32_t>(cachedStatlessMocs), requiredState.statelessMocs.value);
|
||||
EXPECT_EQ(static_cast<int32_t>(cachedStatlessMocs), finalState.statelessMocs.value);
|
||||
|
||||
result = commandList->close();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
ze_command_list_handle_t cmdListHandle = commandList->toHandle();
|
||||
result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(static_cast<int32_t>(cachedStatlessMocs), csrState.statelessMocs.value);
|
||||
|
||||
kernel->kernelRequiresUncachedMocsCount = 1;
|
||||
result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(static_cast<int32_t>(uncachedStatlessMocs), csrState.statelessMocs.value);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListStateBaseAddressTest,
|
||||
givenStateBaseAddressTrackingWhenImmediateCmdListAppendUncachedKerneAndRegularCmdListAppendCachedKernelAndExecuteThenMocsStateIsUpdatedInCsr,
|
||||
IsAtLeastSkl) {
|
||||
EXPECT_TRUE(commandList->stateBaseAddressTracking);
|
||||
EXPECT_TRUE(commandListImmediate->stateBaseAddressTracking);
|
||||
|
||||
uint32_t uncachedStatlessMocs = getMocs(false);
|
||||
uint32_t cachedStatlessMocs = getMocs(true);
|
||||
|
||||
auto &requiredState = commandList->requiredStreamState.stateBaseAddress;
|
||||
auto &finalState = commandList->finalStreamState.stateBaseAddress;
|
||||
auto &csrState = commandQueue->getCsr()->getStreamProperties().stateBaseAddress;
|
||||
|
||||
kernel->kernelRequiresUncachedMocsCount = 1;
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
auto result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(static_cast<int32_t>(uncachedStatlessMocs), csrState.statelessMocs.value);
|
||||
|
||||
kernel->kernelRequiresUncachedMocsCount = 0;
|
||||
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(static_cast<int32_t>(cachedStatlessMocs), requiredState.statelessMocs.value);
|
||||
EXPECT_EQ(static_cast<int32_t>(cachedStatlessMocs), finalState.statelessMocs.value);
|
||||
|
||||
result = commandList->close();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
ze_command_list_handle_t cmdListHandle = commandList->toHandle();
|
||||
result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(static_cast<int32_t>(cachedStatlessMocs), csrState.statelessMocs.value);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListStateBaseAddressTest,
|
||||
givenStateBaseAddressTrackingWhenImmediateCmdListAppendCachedKerneAndRegularCmdListAppendUncachedKernelAndExecuteThenMocsStateIsUpdatedInCsr,
|
||||
IsAtLeastSkl) {
|
||||
EXPECT_TRUE(commandList->stateBaseAddressTracking);
|
||||
EXPECT_TRUE(commandListImmediate->stateBaseAddressTracking);
|
||||
|
||||
uint32_t uncachedStatlessMocs = getMocs(false);
|
||||
uint32_t cachedStatlessMocs = getMocs(true);
|
||||
|
||||
auto &requiredState = commandList->requiredStreamState.stateBaseAddress;
|
||||
auto &finalState = commandList->finalStreamState.stateBaseAddress;
|
||||
auto &csrState = commandQueue->getCsr()->getStreamProperties().stateBaseAddress;
|
||||
|
||||
kernel->kernelRequiresUncachedMocsCount = 0;
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
auto result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(static_cast<int32_t>(cachedStatlessMocs), csrState.statelessMocs.value);
|
||||
|
||||
kernel->kernelRequiresUncachedMocsCount = 1;
|
||||
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(static_cast<int32_t>(uncachedStatlessMocs), requiredState.statelessMocs.value);
|
||||
EXPECT_EQ(static_cast<int32_t>(uncachedStatlessMocs), finalState.statelessMocs.value);
|
||||
|
||||
result = commandList->close();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
ze_command_list_handle_t cmdListHandle = commandList->toHandle();
|
||||
result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(static_cast<int32_t>(uncachedStatlessMocs), csrState.statelessMocs.value);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
Reference in New Issue
Block a user