refactor: improve handling duplicated in-order host storage

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz 2024-02-29 14:52:48 +00:00 committed by Compute-Runtime-Automation
parent e2c511bc00
commit ea2ad550a1
12 changed files with 43 additions and 17 deletions

View File

@ -351,6 +351,7 @@ struct CommandListCoreFamily : public CommandListImp {
uint64_t latestHostWaitedInOrderSyncValue = 0;
bool latestOperationRequiredNonWalkerInOrderCmdsChaining = false;
bool duplicatedInOrderCounterStorageEnabled = false;
};
template <PRODUCT_FAMILY gfxProductFamily>

View File

@ -236,6 +236,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
this->heaplessModeEnabled = compilerProductHelper.isHeaplessModeEnabled();
this->requiredStreamState.initSupport(rootDeviceEnvironment);
this->finalStreamState.initSupport(rootDeviceEnvironment);
this->duplicatedInOrderCounterStorageEnabled = gfxCoreHelper.duplicatedInOrderCounterStorageEnabled(rootDeviceEnvironment);
this->commandContainer.doubleSbaWaRef() = this->doubleSbaWa;
this->commandContainer.l1CachePolicyDataRef() = &this->l1CachePolicyData;
@ -3662,7 +3663,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnSingleEvent(Event *event,
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::addCmdForPatching(std::shared_ptr<NEO::InOrderExecInfo> *externalInOrderExecInfo, void *cmd1, void *cmd2, uint64_t counterValue, NEO::InOrderPatchCommandHelpers::PatchCmdType patchCmdType) {
if ((NEO::debugManager.flags.EnableInOrderRegularCmdListPatching.get() != 0) && !isImmediateType()) {
this->inOrderPatchCmds.emplace_back(externalInOrderExecInfo, cmd1, cmd2, counterValue, patchCmdType, inOrderAtomicSignallingEnabled(), duplicatedInOrderCounterStorageEnabled());
this->inOrderPatchCmds.emplace_back(externalInOrderExecInfo, cmd1, cmd2, counterValue, patchCmdType, inOrderAtomicSignallingEnabled(), this->duplicatedInOrderCounterStorageEnabled);
}
}

View File

@ -78,7 +78,7 @@ void programEventL3Flush(Event *event,
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamily<gfxCoreFamily>::isInOrderNonWalkerSignalingRequired(const Event *event) const {
return (!duplicatedInOrderCounterStorageEnabled() && event && (event->isUsingContextEndOffset() || !event->isCounterBased() || compactL3FlushEvent(getDcFlushRequired(event->isSignalScope()))));
return (!this->duplicatedInOrderCounterStorageEnabled && event && (event->isUsingContextEndOffset() || !event->isCounterBased() || compactL3FlushEvent(getDcFlushRequired(event->isSignalScope()))));
}
template <GFXCORE_FAMILY gfxCoreFamily>

View File

@ -256,16 +256,12 @@ void CommandListImp::setStreamPropertiesDefaultSettings(NEO::StreamProperties &s
streamProperties.stateBaseAddress.setPropertyGlobalAtomics(cmdListDefaultGlobalAtomics, true);
}
bool CommandListImp::duplicatedInOrderCounterStorageEnabled() const {
return (NEO::debugManager.flags.InOrderDuplicatedCounterStorageEnabled.get() == 1);
}
void CommandListImp::enableInOrderExecution() {
UNRECOVERABLE_IF(inOrderExecInfo.get());
auto deviceCounterNode = this->device->getDeviceInOrderCounterAllocator()->getTag();
inOrderExecInfo = NEO::InOrderExecInfo::create(deviceCounterNode, *this->device->getNEODevice(), this->partitionCount, !isImmediateType(), inOrderAtomicSignallingEnabled(), duplicatedInOrderCounterStorageEnabled());
inOrderExecInfo = NEO::InOrderExecInfo::create(deviceCounterNode, *this->device->getNEODevice(), this->partitionCount, !isImmediateType(), inOrderAtomicSignallingEnabled());
}
void CommandListImp::storeReferenceTsToMappedEvents(bool isClearEnabled) {

View File

@ -51,7 +51,6 @@ struct CommandListImp : public CommandList {
~CommandListImp() override = default;
virtual bool inOrderAtomicSignallingEnabled() const = 0;
bool duplicatedInOrderCounterStorageEnabled() const;
static constexpr int32_t cmdListDefaultEngineInstancedDevice = NEO::StreamProperty::initValue;
static constexpr bool cmdListDefaultCoherency = false;

View File

@ -2158,11 +2158,12 @@ HWTEST2_F(InOrderCmdListTests, givenMultipleAllocationsForWriteWhenAskingForNonW
EXPECT_FALSE(immCmdList->isInOrderNonWalkerSignalingRequired(nullptr));
debugManager.flags.InOrderDuplicatedCounterStorageEnabled.set(1);
auto immCmdList2 = createImmCmdList<gfxCoreFamily>();
EXPECT_FALSE(immCmdList->isInOrderNonWalkerSignalingRequired(events[0].get()));
EXPECT_FALSE(immCmdList->isInOrderNonWalkerSignalingRequired(events[1].get()));
EXPECT_FALSE(immCmdList->isInOrderNonWalkerSignalingRequired(events[2].get()));
EXPECT_FALSE(immCmdList->isInOrderNonWalkerSignalingRequired(nullptr));
EXPECT_FALSE(immCmdList2->isInOrderNonWalkerSignalingRequired(events[0].get()));
EXPECT_FALSE(immCmdList2->isInOrderNonWalkerSignalingRequired(events[1].get()));
EXPECT_FALSE(immCmdList2->isInOrderNonWalkerSignalingRequired(events[2].get()));
EXPECT_FALSE(immCmdList2->isInOrderNonWalkerSignalingRequired(nullptr));
}
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenProgramPipeControlWithSignalAllocation, NonPostSyncWalkerMatcher) {

View File

@ -186,6 +186,7 @@ class GfxCoreHelper {
virtual uint32_t getKernelPrivateMemSize(const KernelDescriptor &kernelDescriptor) const = 0;
virtual bool singleTileExecImplicitScalingRequired(bool cooperativeKernel) const = 0;
virtual bool duplicatedInOrderCounterStorageEnabled(const RootDeviceEnvironment &rootDeviceEnvironment) const = 0;
virtual ~GfxCoreHelper() = default;
@ -411,6 +412,7 @@ class GfxCoreHelperHw : public GfxCoreHelper {
uint32_t getKernelPrivateMemSize(const KernelDescriptor &kernelDescriptor) const override;
bool singleTileExecImplicitScalingRequired(bool cooperativeKernel) const override;
bool duplicatedInOrderCounterStorageEnabled(const RootDeviceEnvironment &rootDeviceEnvironment) const override;
~GfxCoreHelperHw() override = default;

View File

@ -756,4 +756,9 @@ bool GfxCoreHelperHw<GfxFamily>::singleTileExecImplicitScalingRequired(bool coop
return cooperativeKernel;
}
template <typename GfxFamily>
bool GfxCoreHelperHw<GfxFamily>::duplicatedInOrderCounterStorageEnabled(const RootDeviceEnvironment &rootDeviceEnvironment) const {
return (debugManager.flags.InOrderDuplicatedCounterStorageEnabled.get() == 1);
}
} // namespace NEO

View File

@ -8,6 +8,7 @@
#include "shared/source/helpers/in_order_cmd_helpers.h"
#include "shared/source/device/device.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/utilities/tag_allocator.h"
@ -18,10 +19,12 @@
namespace NEO {
std::shared_ptr<InOrderExecInfo> InOrderExecInfo::create(TagNodeBase *deviceCounterNode, NEO::Device &device, uint32_t partitionCount, bool regularCmdList, bool atomicDeviceSignalling, bool duplicatedHostStorage) {
std::shared_ptr<InOrderExecInfo> InOrderExecInfo::create(TagNodeBase *deviceCounterNode, NEO::Device &device, uint32_t partitionCount, bool regularCmdList, bool atomicDeviceSignalling) {
NEO::GraphicsAllocation *hostCounterAllocation = nullptr;
if (duplicatedHostStorage) {
auto &gfxCoreHelper = device.getGfxCoreHelper();
if (gfxCoreHelper.duplicatedInOrderCounterStorageEnabled(device.getRootDeviceEnvironment())) {
NEO::AllocationProperties hostAllocationProperties{device.getRootDeviceIndex(), MemoryConstants::pageSize64k, NEO::AllocationType::bufferHostMemory, device.getDeviceBitfield()};
hostCounterAllocation = device.getMemoryManager()->allocateGraphicsMemoryWithProperties(hostAllocationProperties);

View File

@ -48,7 +48,7 @@ class InOrderExecInfo : public NEO::NonCopyableClass {
InOrderExecInfo() = delete;
static std::shared_ptr<InOrderExecInfo> create(TagNodeBase *deviceCounterNode, NEO::Device &device, uint32_t partitionCount, bool regularCmdList, bool atomicDeviceSignalling, bool duplicatedHostStorage);
static std::shared_ptr<InOrderExecInfo> create(TagNodeBase *deviceCounterNode, NEO::Device &device, uint32_t partitionCount, bool regularCmdList, bool atomicDeviceSignalling);
static std::shared_ptr<InOrderExecInfo> createFromExternalAllocation(NEO::Device &device, uint64_t deviceAddress, uint64_t *hostAddress, uint64_t counterValue);
InOrderExecInfo(TagNodeBase *deviceCounterNode, NEO::GraphicsAllocation *hostCounterAllocation, NEO::MemoryManager &memoryManager, uint32_t partitionCount, uint32_t rootDeviceIndex,

View File

@ -88,7 +88,7 @@ HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingInOrderExecIn
EXPECT_NE(deviceNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation()->getGpuAddress(), deviceNode->getGpuAddress());
EXPECT_NE(deviceNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation()->getUnderlyingBuffer(), deviceNode->getCpuBase());
auto inOrderExecInfo = InOrderExecInfo::create(deviceNode, mockDevice, 2, false, false, false);
auto inOrderExecInfo = InOrderExecInfo::create(deviceNode, mockDevice, 2, false, false);
EXPECT_EQ(deviceNode->getCpuBase(), inOrderExecInfo->getBaseHostAddress());
EXPECT_EQ(deviceNode->getBaseGraphicsAllocation()->getGraphicsAllocation(0), inOrderExecInfo->getDeviceCounterAllocation());
@ -115,7 +115,10 @@ HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingInOrderExecIn
{
auto deviceNode = tagAllocator.getTag();
auto inOrderExecInfo = InOrderExecInfo::create(deviceNode, mockDevice, 2, false, false, true);
DebugManagerStateRestore restore;
debugManager.flags.InOrderDuplicatedCounterStorageEnabled.set(1);
auto inOrderExecInfo = InOrderExecInfo::create(deviceNode, mockDevice, 2, false, false);
EXPECT_NE(inOrderExecInfo->getDeviceCounterAllocation(), inOrderExecInfo->getHostCounterAllocation());
EXPECT_NE(deviceNode->getBaseGraphicsAllocation()->getGraphicsAllocation(0), inOrderExecInfo->getHostCounterAllocation());

View File

@ -1673,6 +1673,21 @@ HWTEST_F(GfxCoreHelperTest, whenAskingIf48bResourceNeededForCmdBufferThenReturnT
EXPECT_TRUE(getHelper<GfxCoreHelper>().is48ResourceNeededForCmdBuffer());
}
HWTEST_F(GfxCoreHelperTest, givenDebugVariableSetWhenAskingForDumplicatedInOrderHostStorageThenReturnCorrectValue) {
DebugManagerStateRestore restore;
auto &helper = getHelper<GfxCoreHelper>();
auto &rootExecEnv = *pDevice->getExecutionEnvironment()->rootDeviceEnvironments[0];
EXPECT_FALSE(helper.duplicatedInOrderCounterStorageEnabled(rootExecEnv));
debugManager.flags.InOrderDuplicatedCounterStorageEnabled.set(1);
EXPECT_TRUE(helper.duplicatedInOrderCounterStorageEnabled(rootExecEnv));
debugManager.flags.InOrderDuplicatedCounterStorageEnabled.set(0);
EXPECT_FALSE(helper.duplicatedInOrderCounterStorageEnabled(rootExecEnv));
}
TEST_F(GfxCoreHelperTest, whenOnlyPerThreadPrivateMemorySizeIsDefinedThenItIsReturnedAsKernelPrivateMemorySize) {
KernelDescriptor kernelDescriptor{};
kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize = 0x100u;