Refactor state base address programing 3/n

This is small optimization to replace virtual call and retrieved struct with
cached value.

Related-To: NEO-5055

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2023-02-22 21:30:40 +00:00
committed by Compute-Runtime-Automation
parent 075c96267d
commit 3cb064fe95
25 changed files with 94 additions and 64 deletions

View File

@@ -352,6 +352,7 @@ struct CommandList : _ze_command_list_handle_t {
bool stateComputeModeTracking = false;
bool signalAllEventPackets = false;
bool stateBaseAddressTracking = false;
bool doubleSbaWa = false;
};
using CommandListAllocatorFn = CommandList *(*)(uint32_t);

View File

@@ -151,6 +151,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
this->compactL3FlushEventPacket = L0GfxCoreHelper::useCompactL3FlushEventPacket(hwInfo);
this->signalAllEventPackets = L0GfxCoreHelper::useSignalAllEventPackets(hwInfo);
this->dynamicHeapRequired = NEO::EncodeDispatchKernel<GfxFamily>::isDshNeeded(device->getDeviceInfo());
auto &productHelper = rootDeviceEnvironment.getHelper<NEO::ProductHelper>();
this->doubleSbaWa = productHelper.isAdditionalStateBaseAddressWARequired(hwInfo);
commandContainer.doubleSbaWa = this->doubleSbaWa;
if (device->isImplicitScalingCapable() && !this->internalUsage && !isCopyOnly()) {
this->partitionCount = static_cast<uint32_t>(this->device->getNEODevice()->getDeviceBitfield().count());
@@ -2628,7 +2631,8 @@ void CommandListCoreFamily<gfxCoreFamily>::programStateBaseAddress(NEO::CommandC
statelessMocsIndex, // statelessMocsIndex
false, // useGlobalAtomics
this->partitionCount > 1, // multiOsContextCapable
isRcs}; // isRcs
isRcs, // isRcs
this->doubleSbaWa}; // doubleSbaWa
NEO::EncodeStateBaseAddress<GfxFamily>::encode(encodeStateBaseAddressArgs);
bool sbaTrackingEnabled = NEO::Debugger::isDebugEnabled(this->internalUsage) && device->getL0Debugger();

View File

@@ -14,9 +14,11 @@
#include "shared/source/command_stream/wait_status.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/debugger/debugger_l0.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/os_interface/hw_info_config.h"
#include "shared/source/os_interface/os_context.h"
#include "level_zero/core/source/cmdqueue/cmdqueue_imp.h"
@@ -86,11 +88,14 @@ ze_result_t CommandQueueImp::initialize(bool copyOnly, bool isInternal) {
if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger()) {
device->getL0Debugger()->notifyCommandQueueCreated(device->getNEODevice());
}
auto &hwInfo = device->getHwInfo();
auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironment();
this->stateComputeModeTracking = L0GfxCoreHelper::enableStateComputeModeTracking(rootDeviceEnvironment);
this->frontEndStateTracking = L0GfxCoreHelper::enableFrontEndStateTracking(rootDeviceEnvironment);
this->pipelineSelectStateTracking = L0GfxCoreHelper::enablePipelineSelectStateTracking(rootDeviceEnvironment);
this->stateBaseAddressTracking = L0GfxCoreHelper::enableStateBaseAddressTracking(rootDeviceEnvironment);
auto &productHelper = rootDeviceEnvironment.getHelper<NEO::ProductHelper>();
this->doubleSbaWa = productHelper.isAdditionalStateBaseAddressWARequired(hwInfo);
}
return returnValue;
}

View File

@@ -77,6 +77,7 @@ struct CommandQueue : _ze_command_queue_handle_t {
bool pipelineSelectStateTracking = false;
bool stateComputeModeTracking = false;
bool stateBaseAddressTracking = false;
bool doubleSbaWa = false;
};
using CommandQueueAllocatorFn = CommandQueue *(*)(Device *device, NEO::CommandStreamReceiver *csr,

View File

@@ -27,7 +27,6 @@ template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream, bool cachedMOCSAllowed) {
using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS;
const auto &hwInfo = this->device->getHwInfo();
NEO::Device *neoDevice = device->getNEODevice();
bool isRcs = this->getCsr()->isRcs();
auto &rootDeviceEnvironment = neoDevice->getRootDeviceEnvironment();
@@ -59,7 +58,6 @@ void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool
nullptr, // ioh
nullptr, // ssh
neoDevice->getGmmHelper(), // gmmHelper
&hwInfo, // hwInfo
(device->getMOCS(cachedMOCSAllowed, false) >> 1), // statelessMocsIndex
NEO::MemoryCompressionState::NotApplicable, // memoryCompressionState
true, // setInstructionStateBaseAddress
@@ -69,7 +67,8 @@ void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool
false, // useGlobalAtomics
false, // areMultipleSubDevicesInContext
false, // overrideSurfaceStateBaseAddress
isDebuggerActive // isDebuggerActive
isDebuggerActive, // isDebuggerActive
this->doubleSbaWa // doubleSbaWa
};
NEO::StateBaseAddressHelper<GfxFamily>::programStateBaseAddressIntoCommandStream(stateBaseAddressHelperArgs, commandStream);

View File

@@ -24,7 +24,6 @@ template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream, bool cachedMOCSAllowed) {
using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS;
NEO::Device *neoDevice = device->getNEODevice();
auto &hwInfo = neoDevice->getHardwareInfo();
uint32_t rootDeviceIndex = neoDevice->getRootDeviceIndex();
bool multiOsContextCapable = device->isImplicitScalingCapable();
@@ -54,7 +53,6 @@ void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool
nullptr, // ioh
nullptr, // ssh
neoDevice->getGmmHelper(), // gmmHelper
&hwInfo, // hwInfo
(device->getMOCS(cachedMOCSAllowed, false) >> 1), // statelessMocsIndex
NEO::MemoryCompressionState::NotApplicable, // memoryCompressionState
true, // setInstructionStateBaseAddress
@@ -64,7 +62,8 @@ void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool
false, // useGlobalAtomics
false, // areMultipleSubDevicesInContext
false, // overrideSurfaceStateBaseAddress
isDebuggerActive // isDebuggerActive
isDebuggerActive, // isDebuggerActive
this->doubleSbaWa // doubleSbaWa
};
NEO::StateBaseAddressHelper<GfxFamily>::programStateBaseAddressIntoCommandStream(stateBaseAddressHelperArgs, commandStream);
@@ -87,16 +86,12 @@ template <GFXCORE_FAMILY gfxCoreFamily>
size_t CommandQueueHw<gfxCoreFamily>::estimateStateBaseAddressCmdSize() {
using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS;
using _3DSTATE_BINDING_TABLE_POOL_ALLOC = typename GfxFamily::_3DSTATE_BINDING_TABLE_POOL_ALLOC;
NEO::Device *neoDevice = device->getNEODevice();
auto &hwInfo = neoDevice->getHardwareInfo();
auto &productHelper = neoDevice->getProductHelper();
size_t size = 0;
if (NEO::ApiSpecificConfig::getBindlessConfiguration()) {
size += sizeof(STATE_BASE_ADDRESS) + NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier(false) + sizeof(_3DSTATE_BINDING_TABLE_POOL_ALLOC);
if (productHelper.isAdditionalStateBaseAddressWARequired(hwInfo)) {
if (this->doubleSbaWa) {
size += sizeof(STATE_BASE_ADDRESS);
}
}

View File

@@ -52,6 +52,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
using BaseClass::containsAnyKernel;
using BaseClass::containsCooperativeKernelsFlag;
using BaseClass::csr;
using BaseClass::doubleSbaWa;
using BaseClass::engineGroupType;
using BaseClass::estimateBufferSizeMultiTileBarrier;
using BaseClass::finalStreamState;
@@ -137,6 +138,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
using BaseClass::commandsToPatch;
using BaseClass::compactL3FlushEventPacket;
using BaseClass::csr;
using BaseClass::doubleSbaWa;
using BaseClass::finalStreamState;
using BaseClass::frontEndStateTracking;
using BaseClass::getDcFlushRequired;
@@ -175,6 +177,7 @@ struct WhiteBox<::L0::CommandList> : public ::L0::CommandListImp {
using BaseClass::commandContainer;
using BaseClass::commandListPreemptionMode;
using BaseClass::csr;
using BaseClass::doubleSbaWa;
using BaseClass::finalStreamState;
using BaseClass::frontEndStateTracking;
using BaseClass::getDcFlushRequired;

View File

@@ -33,6 +33,7 @@ struct WhiteBox<::L0::CommandQueue> : public ::L0::CommandQueueImp {
using BaseClass::synchronizeByPollingForTaskCount;
using BaseClass::taskCount;
using CommandQueue::activeSubDevices;
using CommandQueue::doubleSbaWa;
using CommandQueue::frontEndStateTracking;
using CommandQueue::internalUsage;
using CommandQueue::partitionCount;
@@ -67,6 +68,7 @@ struct MockCommandQueueHw : public L0::CommandQueueHw<gfxCoreFamily> {
using BaseClass::prepareAndSubmitBatchBuffer;
using BaseClass::printfKernelContainer;
using L0::CommandQueue::activeSubDevices;
using L0::CommandQueue::doubleSbaWa;
using L0::CommandQueue::frontEndStateTracking;
using L0::CommandQueue::internalUsage;
using L0::CommandQueue::partitionCount;

View File

@@ -2376,7 +2376,10 @@ HWTEST2_F(CommandListCreate, givenNullEventWhenAppendEventAfterWalkerThenNothing
}
TEST_F(CommandListCreate, givenCreatedCommandListWhenGettingTrackingFlagsThenDefaultValuseIsHwSupported) {
auto &l0GfxCoreHelper = device->getNEODevice()->getRootDeviceEnvironment().getHelper<L0GfxCoreHelper>();
auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironment();
auto &l0GfxCoreHelper = rootDeviceEnvironment.getHelper<L0GfxCoreHelper>();
auto &productHelper = rootDeviceEnvironment.getHelper<NEO::ProductHelper>();
ze_result_t returnValue;
std::unique_ptr<L0::ult::CommandList> commandList(whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)));
@@ -2393,6 +2396,9 @@ TEST_F(CommandListCreate, givenCreatedCommandListWhenGettingTrackingFlagsThenDef
bool expectedStateBaseAddressTracking = l0GfxCoreHelper.platformSupportsStateBaseAddressTracking();
EXPECT_EQ(expectedStateBaseAddressTracking, commandList->stateBaseAddressTracking);
bool expectedDoubleSbaWa = productHelper.isAdditionalStateBaseAddressWARequired(device->getHwInfo());
EXPECT_EQ(expectedDoubleSbaWa, commandList->doubleSbaWa);
}
} // namespace ult

View File

@@ -1974,7 +1974,11 @@ TEST_F(CommandQueueCreate, givenOverrideCmdQueueSyncModeToSynchronousWhenCommand
}
TEST_F(CommandQueueCreate, givenCreatedCommandQueueWhenGettingTrackingFlagsThenDefaultValuseIsHwSupported) {
auto &l0GfxCoreHelper = device->getNEODevice()->getRootDeviceEnvironment().getHelper<L0GfxCoreHelper>();
auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironment();
auto &l0GfxCoreHelper = rootDeviceEnvironment.getHelper<L0GfxCoreHelper>();
auto &productHelper = rootDeviceEnvironment.getHelper<NEO::ProductHelper>();
const ze_command_queue_desc_t desc{};
ze_result_t returnValue;
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
@@ -2000,6 +2004,9 @@ TEST_F(CommandQueueCreate, givenCreatedCommandQueueWhenGettingTrackingFlagsThenD
bool expectedStateBaseAddressTracking = l0GfxCoreHelper.platformSupportsStateBaseAddressTracking();
EXPECT_EQ(expectedStateBaseAddressTracking, commandQueue->stateBaseAddressTracking);
bool expectedDoubleSbaWa = productHelper.isAdditionalStateBaseAddressWARequired(device->getHwInfo());
EXPECT_EQ(expectedDoubleSbaWa, commandQueue->doubleSbaWa);
commandQueue->destroy();
}