Refactor partitioning of state base address

Related-To: NEO-6589

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2022-01-13 23:48:47 +00:00
committed by Compute-Runtime-Automation
parent 53482e6821
commit b78bb26cbf
7 changed files with 62 additions and 28 deletions

View File

@@ -101,6 +101,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
clearCommandsToPatch();
commandListSLMEnabled = false;
if (device->isImplicitScalingCapable() && !this->internalUsage) {
this->partitionCount = static_cast<uint32_t>(this->device->getNEODevice()->getDeviceBitfield().count());
} else {
this->partitionCount = 1;
}
if (!isCopyOnly()) {
if (!NEO::ApiSpecificConfig::getBindlessConfiguration()) {
programStateBaseAddress(commandContainer, false);
@@ -113,11 +119,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(alloc);
}
this->ownedPrivateAllocations.clear();
if (device->isImplicitScalingCapable() && !this->internalUsage) {
this->partitionCount = static_cast<uint32_t>(this->device->getNEODevice()->getDeviceBitfield().count());
} else {
this->partitionCount = 1;
}
return ZE_RESULT_SUCCESS;
}
@@ -130,6 +131,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
this->engineGroupType = engineGroupType;
this->flags = flags;
if (device->isImplicitScalingCapable() && !this->internalUsage) {
this->partitionCount = static_cast<uint32_t>(this->device->getNEODevice()->getDeviceBitfield().count());
}
if (this->cmdListType == CommandListType::TYPE_IMMEDIATE) {
this->isFlushTaskSubmissionEnabled = NEO::DebugManager.flags.EnableFlushTaskSubmission.get();
commandContainer.setFlushTaskUsedForImmediate(this->isFlushTaskSubmissionEnabled);
@@ -151,10 +156,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
}
}
if (device->isImplicitScalingCapable() && !this->internalUsage) {
this->partitionCount = static_cast<uint32_t>(this->device->getNEODevice()->getDeviceBitfield().count());
}
return returnType;
}
@@ -2321,7 +2322,7 @@ void CommandListCoreFamily<gfxCoreFamily>::programStateBaseAddress(NEO::CommandC
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
STATE_BASE_ADDRESS sba;
NEO::EncodeStateBaseAddress<GfxFamily>::encode(commandContainer, sba);
NEO::EncodeStateBaseAddress<GfxFamily>::encode(commandContainer, sba, this->partitionCount > 1);
if (NEO::Debugger::isDebugEnabled(this->internalUsage) && device->getL0Debugger()) {
NEO::Debugger::SbaAddresses sbaAddresses = {};
sbaAddresses.BindlessSurfaceStateBaseAddress = sba.getBindlessSurfaceStateBaseAddress();

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021 Intel Corporation
* Copyright (C) 2021-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -11,6 +11,7 @@
#include "shared/test/common/test_macros/test.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw.h"
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
@@ -66,6 +67,40 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandListTests, whenCommandListIsCreatedThenPCAnd
EXPECT_EQ(ssh->getHeapGpuBase(), cmdSba->getSurfaceStateBaseAddress());
EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST), cmdSba->getStatelessDataPortAccessMemoryObjectControlState());
EXPECT_TRUE(cmdSba->getDisableSupportForMultiGpuPartialWritesForStatelessMessages());
EXPECT_TRUE(cmdSba->getDisableSupportForMultiGpuAtomicsForStatelessAccesses());
}
using MultiTileCommandListTests = Test<MultiTileCommandListFixture<false, false>>;
HWCMDTEST_F(IGFX_XE_HP_CORE, MultiTileCommandListTests, givenPartitionedCommandListWhenCommandListIsCreatedThenStateBaseAddressCmdWithMultiPartialAndAtomicsCorrectlyProgrammed) {
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue));
EXPECT_EQ(2u, commandList->partitionCount);
auto &commandContainer = commandList->commandContainer;
ASSERT_NE(nullptr, commandContainer.getCommandStream());
auto usedSpaceBefore = commandContainer.getCommandStream()->getUsed();
auto result = commandList->close();
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter));
auto itorSba = find<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorSba);
auto cmdSba = genCmdCast<STATE_BASE_ADDRESS *>(*itorSba);
EXPECT_FALSE(cmdSba->getDisableSupportForMultiGpuPartialWritesForStatelessMessages());
EXPECT_TRUE(cmdSba->getDisableSupportForMultiGpuAtomicsForStatelessAccesses());
}
using CommandListTestsReserveSize = Test<DeviceFixture>;

View File

@@ -226,8 +226,8 @@ struct EncodeMediaInterfaceDescriptorLoad {
template <typename GfxFamily>
struct EncodeStateBaseAddress {
using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS;
static void encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd);
static void encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, uint32_t statelessMocsIndex, bool useGlobalAtomics);
static void encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, bool multiOsContextCapable);
static void encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, uint32_t statelessMocsIndex, bool useGlobalAtomics, bool multiOsContextCapable);
static void setIohAddressForDebugger(NEO::Debugger::SbaAddresses &sbaAddress, const STATE_BASE_ADDRESS &sbaCmd);
static size_t getRequiredSizeForStateBaseAddress(Device &device, CommandContainer &container);
};

View File

@@ -177,7 +177,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
auto gmmHelper = container.getDevice()->getGmmHelper();
uint32_t statelessMocsIndex =
args.requiresUncachedMocs ? (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1) : (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1);
EncodeStateBaseAddress<Family>::encode(container, sba, statelessMocsIndex, false);
EncodeStateBaseAddress<Family>::encode(container, sba, statelessMocsIndex, false, false);
container.setDirtyStateForAllHeaps(false);
args.requiresUncachedMocs = false;
}
@@ -378,14 +378,14 @@ void EncodeStateBaseAddress<Family>::setIohAddressForDebugger(NEO::Debugger::Sba
}
template <typename Family>
void EncodeStateBaseAddress<Family>::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd) {
void EncodeStateBaseAddress<Family>::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, bool multiOsContextCapable) {
auto gmmHelper = container.getDevice()->getRootDeviceEnvironment().getGmmHelper();
uint32_t statelessMocsIndex = (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1);
EncodeStateBaseAddress<Family>::encode(container, sbaCmd, statelessMocsIndex, false);
EncodeStateBaseAddress<Family>::encode(container, sbaCmd, statelessMocsIndex, false, multiOsContextCapable);
}
template <typename Family>
void EncodeStateBaseAddress<Family>::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, uint32_t statelessMocsIndex, bool useGlobalAtomics) {
void EncodeStateBaseAddress<Family>::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, uint32_t statelessMocsIndex, bool useGlobalAtomics, bool multiOsContextCapable) {
if (container.isAnyHeapDirty()) {
EncodeWA<Family>::encodeAdditionalPipelineSelect(*container.getDevice(), *container.getCommandStream(), true);
}

View File

@@ -213,7 +213,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
auto gmmHelper = container.getDevice()->getGmmHelper();
uint32_t statelessMocsIndex =
args.requiresUncachedMocs ? (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1) : (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1);
EncodeStateBaseAddress<Family>::encode(container, sbaCmd, statelessMocsIndex, args.useGlobalAtomics);
EncodeStateBaseAddress<Family>::encode(container, sbaCmd, statelessMocsIndex, args.useGlobalAtomics, args.partitionCount > 1);
container.setDirtyStateForAllHeaps(false);
args.requiresUncachedMocs = false;
}
@@ -473,17 +473,15 @@ void EncodeStateBaseAddress<Family>::setIohAddressForDebugger(NEO::Debugger::Sba
}
template <typename Family>
void EncodeStateBaseAddress<Family>::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd) {
void EncodeStateBaseAddress<Family>::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, bool multiOsContextCapable) {
auto gmmHelper = container.getDevice()->getRootDeviceEnvironment().getGmmHelper();
uint32_t statelessMocsIndex = (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1);
EncodeStateBaseAddress<Family>::encode(container, sbaCmd, statelessMocsIndex, false);
EncodeStateBaseAddress<Family>::encode(container, sbaCmd, statelessMocsIndex, false, multiOsContextCapable);
}
template <typename Family>
void EncodeStateBaseAddress<Family>::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, uint32_t statelessMocsIndex, bool useGlobalAtomics) {
void EncodeStateBaseAddress<Family>::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, uint32_t statelessMocsIndex, bool useGlobalAtomics, bool multiOsContextCapable) {
auto gmmHelper = container.getDevice()->getRootDeviceEnvironment().getGmmHelper();
bool multiOsContextCapable =
ImplicitScalingHelper::isImplicitScalingEnabled(container.getDevice()->getDeviceBitfield(), true);
StateBaseAddressHelper<Family>::programStateBaseAddress(
&sbaCmd,

View File

@@ -80,7 +80,7 @@ GEN12LPTEST_F(CommandEncoderTest, givenVariousEngineTypesWhenEncodeSBAThenAdditi
{
STATE_BASE_ADDRESS sba;
EncodeStateBaseAddress<FamilyType>::encode(cmdContainer, sba);
EncodeStateBaseAddress<FamilyType>::encode(cmdContainer, sba, false);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed());
@@ -94,7 +94,7 @@ GEN12LPTEST_F(CommandEncoderTest, givenVariousEngineTypesWhenEncodeSBAThenAdditi
static_cast<MockOsContext *>(pDevice->getDefaultEngine().osContext)->engineType = aub_stream::ENGINE_CCS;
STATE_BASE_ADDRESS sba;
EncodeStateBaseAddress<FamilyType>::encode(cmdContainer, sba);
EncodeStateBaseAddress<FamilyType>::encode(cmdContainer, sba, false);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed());

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2021 Intel Corporation
* Copyright (C) 2020-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -288,7 +288,7 @@ HWTEST2_F(CommandEncodeStatesTest, givenCommandContainerWithDirtyHeapsWhenSetSta
cmdContainer->setHeapDirty(NEO::HeapType::SURFACE_STATE);
STATE_BASE_ADDRESS sba;
EncodeStateBaseAddress<FamilyType>::encode(*cmdContainer.get(), sba);
EncodeStateBaseAddress<FamilyType>::encode(*cmdContainer.get(), sba, false);
auto dsh = cmdContainer->getIndirectHeap(NEO::HeapType::DYNAMIC_STATE);
auto ssh = cmdContainer->getIndirectHeap(NEO::HeapType::SURFACE_STATE);
@@ -318,7 +318,7 @@ HWTEST_F(CommandEncodeStatesTest, givenCommandContainerWhenSetStateBaseAddressCa
cmdContainer->dirtyHeaps = 0;
STATE_BASE_ADDRESS sba;
EncodeStateBaseAddress<FamilyType>::encode(*cmdContainer.get(), sba);
EncodeStateBaseAddress<FamilyType>::encode(*cmdContainer.get(), sba, false);
auto dsh = cmdContainer->getIndirectHeap(NEO::HeapType::DYNAMIC_STATE);
auto ssh = cmdContainer->getIndirectHeap(NEO::HeapType::SURFACE_STATE);