Unify layout of command list class

Related-To: NEO-5055

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2023-03-06 20:33:50 +00:00
committed by Compute-Runtime-Automation
parent 3e116ea378
commit 49def723b7
47 changed files with 926 additions and 767 deletions

View File

@@ -10,6 +10,7 @@
#include "shared/source/command_container/cmdcontainer.h"
#include "shared/source/command_stream/preemption_mode.h"
#include "shared/source/command_stream/stream_properties.h"
#include "shared/source/helpers/cache_policy.h"
#include "shared/source/helpers/heap_base_address_model.h"
#include "shared/source/memory_manager/prefetch_manager.h"
#include "shared/source/unified_memory/unified_memory.h"
@@ -258,7 +259,7 @@ struct CommandList : _ze_command_list_handle_t {
virtual ze_result_t executeCommandListImmediate(bool performMigration) = 0;
virtual ze_result_t initialize(Device *device, NEO::EngineGroupType engineGroupType, ze_command_list_flags_t flags) = 0;
virtual ~CommandList();
NEO::CommandContainer commandContainer;
bool getContainsStatelessUncachedResource() { return containsStatelessUncachedResource; }
std::map<const void *, NEO::GraphicsAllocation *> &getHostPtrMap() {
return hostPtrMap;
@@ -296,24 +297,41 @@ struct CommandList : _ze_command_list_handle_t {
return this->isTbxMode && !this->isSyncModeQueue;
}
ze_context_handle_t hContext = nullptr;
std::vector<Kernel *> printfKernelContainer;
CommandQueue *cmdQImmediate = nullptr;
NEO::CommandStreamReceiver *csr = nullptr;
Device *device = nullptr;
NEO::PreemptionMode commandListPreemptionMode = NEO::PreemptionMode::Initial;
unsigned long numThreads = 1u;
uint32_t cmdListType = CommandListType::TYPE_REGULAR;
uint32_t commandListPerThreadScratchSize = 0u;
uint32_t commandListPerThreadPrivateScratchSize = 0u;
uint32_t partitionCount = 1;
bool isFlushTaskSubmissionEnabled = false;
bool isSyncModeQueue = false;
bool isTbxMode = false;
bool commandListSLMEnabled = false;
bool requiresQueueUncachedMocs = false;
bool isBcsSplitNeeded = false;
bool immediateCmdListHeapSharing = false;
void setCmdListContext(ze_context_handle_t contextHandle) {
this->hContext = contextHandle;
}
ze_context_handle_t getCmdListContext() const {
return this->hContext;
}
uint32_t getPartitionCount() const {
return this->partitionCount;
}
uint32_t getCmdListType() const {
return this->cmdListType;
}
bool isRequiredQueueUncachedMocs() const {
return requiresQueueUncachedMocs;
}
bool flushTaskSubmissionEnabled() const {
return isFlushTaskSubmissionEnabled;
}
Device *getDevice() const {
return this->device;
}
NEO::CommandContainer &getCmdContainer() {
return this->commandContainer;
}
void setCsr(NEO::CommandStreamReceiver *newCsr) {
this->csr = newCsr;
}
protected:
NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize);
@@ -328,18 +346,50 @@ struct CommandList : _ze_command_list_handle_t {
std::map<const void *, NEO::GraphicsAllocation *> hostPtrMap;
std::vector<NEO::GraphicsAllocation *> ownedPrivateAllocations;
std::vector<NEO::GraphicsAllocation *> patternAllocations;
CmdListReturnPoints returnPoints;
std::vector<Kernel *> printfKernelContainer;
NEO::CommandContainer commandContainer;
CmdListReturnPoints returnPoints;
NEO::StreamProperties requiredStreamState{};
NEO::StreamProperties finalStreamState{};
CommandsToPatch commandsToPatch{};
UnifiedMemoryControls unifiedMemoryControls;
NEO::PrefetchContext prefetchContext;
NEO::L1CachePolicy l1CachePolicyData{};
int64_t currentSurfaceStateBaseAddress = NEO::StreamProperty64::initValue;
int64_t currentDynamicStateBaseAddress = NEO::StreamProperty64::initValue;
int64_t currentIndirectObjectBaseAddress = NEO::StreamProperty64::initValue;
int64_t currentBindingTablePoolBaseAddress = NEO::StreamProperty64::initValue;
ze_context_handle_t hContext = nullptr;
CommandQueue *cmdQImmediate = nullptr;
NEO::CommandStreamReceiver *csr = nullptr;
Device *device = nullptr;
size_t cmdListCurrentStartOffset = 0;
unsigned long numThreads = 1u;
ze_command_list_flags_t flags = 0u;
NEO::PreemptionMode commandListPreemptionMode = NEO::PreemptionMode::Initial;
NEO::EngineGroupType engineGroupType;
NEO::HeapAddressModel cmdListHeapAddressModel = NEO::HeapAddressModel::PrivateHeaps;
uint32_t cmdListType = CommandListType::TYPE_REGULAR;
uint32_t commandListPerThreadScratchSize = 0u;
uint32_t commandListPerThreadPrivateScratchSize = 0u;
uint32_t partitionCount = 1;
uint32_t defaultMocsIndex = 0;
bool isFlushTaskSubmissionEnabled = false;
bool isSyncModeQueue = false;
bool isTbxMode = false;
bool commandListSLMEnabled = false;
bool requiresQueueUncachedMocs = false;
bool isBcsSplitNeeded = false;
bool immediateCmdListHeapSharing = false;
bool indirectAllocationsAllowed = false;
bool internalUsage = false;
bool containsCooperativeKernelsFlag = false;
@@ -353,6 +403,10 @@ struct CommandList : _ze_command_list_handle_t {
bool signalAllEventPackets = false;
bool stateBaseAddressTracking = false;
bool doubleSbaWa = false;
bool containsAnyKernel = false;
bool pipeControlMultiKernelEventSync = false;
bool compactL3FlushEventPacket = false;
bool dynamicHeapRequired = false;
};
using CommandListAllocatorFn = CommandList *(*)(uint32_t);

View File

@@ -8,7 +8,6 @@
#pragma once
#include "shared/source/command_stream/transfer_direction.h"
#include "shared/source/helpers/cache_policy.h"
#include "shared/source/helpers/hw_mapper.h"
#include "shared/source/helpers/pipe_control_args.h"
#include "shared/source/helpers/vec.h"
@@ -327,28 +326,6 @@ struct CommandListCoreFamily : CommandListImp {
static constexpr bool cmdListDefaultPipelineSelectModeSelected = true;
static constexpr bool cmdListDefaultMediaSamplerClockGate = false;
static constexpr bool cmdListDefaultGlobalAtomics = false;
NEO::L1CachePolicy l1CachePolicyData{};
int64_t currentSurfaceStateBaseAddress = NEO::StreamProperty64::initValue;
int64_t currentDynamicStateBaseAddress = NEO::StreamProperty64::initValue;
int64_t currentIndirectObjectBaseAddress = NEO::StreamProperty64::initValue;
int64_t currentBindingTablePoolBaseAddress = NEO::StreamProperty64::initValue;
size_t currentSurfaceStateSize = NEO::StreamPropertySizeT::initValue;
size_t currentDynamicStateSize = NEO::StreamPropertySizeT::initValue;
size_t currentIndirectObjectSize = NEO::StreamPropertySizeT::initValue;
size_t currentBindingTablePoolSize = NEO::StreamPropertySizeT::initValue;
size_t cmdListCurrentStartOffset = 0;
int32_t currentMocsState = NEO::StreamProperty::initValue;
uint32_t defaultMocsIndex = 0;
bool containsAnyKernel = false;
bool pipeControlMultiKernelEventSync = false;
bool compactL3FlushEventPacket = false;
bool dynamicHeapRequired = false;
};
template <PRODUCT_FAMILY gfxProductFamily>

View File

@@ -106,11 +106,6 @@ void CommandListCoreFamily<gfxCoreFamily>::postInitComputeSetup() {
currentDynamicStateBaseAddress = NEO::StreamProperty64::initValue;
currentIndirectObjectBaseAddress = NEO::StreamProperty64::initValue;
currentBindingTablePoolBaseAddress = NEO::StreamProperty64::initValue;
currentSurfaceStateSize = NEO::StreamPropertySizeT::initValue;
currentDynamicStateSize = NEO::StreamPropertySizeT::initValue;
currentIndirectObjectSize = NEO::StreamPropertySizeT::initValue;
currentBindingTablePoolSize = NEO::StreamPropertySizeT::initValue;
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -2441,12 +2436,17 @@ template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::updateStreamPropertiesForRegularCommandLists(Kernel &kernel, bool isCooperative, const ze_group_count_t *threadGroupDimensions, bool isIndirect) {
using VFE_STATE_TYPE = typename GfxFamily::VFE_STATE_TYPE;
size_t currentSurfaceStateSize = NEO::StreamPropertySizeT::initValue;
size_t currentDynamicStateSize = NEO::StreamPropertySizeT::initValue;
size_t currentIndirectObjectSize = NEO::StreamPropertySizeT::initValue;
size_t currentBindingTablePoolSize = NEO::StreamPropertySizeT::initValue;
auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironment();
auto &kernelAttributes = kernel.getKernelDescriptor().kernelAttributes;
KernelImp &kernelImp = static_cast<KernelImp &>(kernel);
currentMocsState = static_cast<int32_t>(device->getMOCS(!kernelImp.getKernelRequiresUncachedMocs(), false) >> 1);
int32_t currentMocsState = static_cast<int32_t>(device->getMOCS(!kernelImp.getKernelRequiresUncachedMocs(), false) >> 1);
bool checkSsh = false;
if (currentSurfaceStateBaseAddress == NEO::StreamProperty64::initValue || commandContainer.isHeapDirty(NEO::IndirectHeap::Type::SURFACE_STATE)) {
auto ssh = commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::SURFACE_STATE);

View File

@@ -271,7 +271,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::validateCommandListsParams(
return ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE;
}
if (this->activeSubDevices < commandList->partitionCount) {
if (this->activeSubDevices < commandList->getPartitionCount()) {
return ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE;
}
}
@@ -443,7 +443,7 @@ CommandQueueHw<gfxCoreFamily>::CommandListExecutionContext::CommandListExecution
}
// If the Command List has commands that require uncached MOCS, then any changes to the commands in the queue requires the uncached MOCS
if (commandList->requiresQueueUncachedMocs && this->cachedMOCSAllowed == true) {
if (commandList->isRequiredQueueUncachedMocs() && this->cachedMOCSAllowed == true) {
this->cachedMOCSAllowed = false;
}
@@ -530,26 +530,27 @@ void CommandQueueHw<gfxCoreFamily>::setupCmdListsAndContextParams(
for (auto i = 0u; i < numCommandLists; i++) {
auto commandList = CommandList::fromHandle(phCommandLists[i]);
commandList->setCsr(this->csr);
commandList->csr = this->csr;
auto &commandContainer = commandList->getCmdContainer();
ctx.containsAnyRegularCmdList |= commandList->cmdListType == CommandList::CommandListType::TYPE_REGULAR;
ctx.containsAnyRegularCmdList |= commandList->getCmdListType() == CommandList::CommandListType::TYPE_REGULAR;
if (!isCopyOnlyCommandQueue) {
ctx.perThreadScratchSpaceSize = std::max(ctx.perThreadScratchSpaceSize, commandList->getCommandListPerThreadScratchSize());
ctx.perThreadPrivateScratchSize = std::max(ctx.perThreadPrivateScratchSize, commandList->getCommandListPerThreadPrivateScratchSize());
if (commandList->getCommandListPerThreadScratchSize() != 0 || commandList->getCommandListPerThreadPrivateScratchSize() != 0) {
if (commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE) != nullptr) {
heapContainer.push_back(commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE)->getGraphicsAllocation());
if (commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE) != nullptr) {
heapContainer.push_back(commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE)->getGraphicsAllocation());
}
for (auto element : commandList->commandContainer.sshAllocations) {
for (auto element : commandContainer.sshAllocations) {
heapContainer.push_back(element);
}
}
}
this->partitionCount = std::max(this->partitionCount, commandList->partitionCount);
makeResidentAndMigrate(ctx.isMigrationRequested, commandList->commandContainer.getResidencyContainer());
this->partitionCount = std::max(this->partitionCount, commandList->getPartitionCount());
makeResidentAndMigrate(ctx.isMigrationRequested, commandContainer.getResidencyContainer());
}
ctx.isDispatchTaskCountPostSyncRequired = isDispatchTaskCountPostSyncRequired(hFence, ctx.containsAnyRegularCmdList);
@@ -568,7 +569,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeInitial(
for (auto i = 0u; i < numCommandLists; i++) {
auto commandList = CommandList::fromHandle(phCommandLists[i]);
linearStreamSizeEstimate += commandList->commandContainer.getCmdBufferAllocations().size();
linearStreamSizeEstimate += commandList->getCmdContainer().getCmdBufferAllocations().size();
}
linearStreamSizeEstimate *= NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::getBatchBufferStartSize();
linearStreamSizeEstimate += this->csr->getCmdsSizeForHardwareContext();
@@ -746,7 +747,7 @@ void CommandQueueHw<gfxCoreFamily>::programStateBaseAddressWithGsbaIfDirty(
if (!ctx.gsbaStateDirty) {
return;
}
auto indirectHeap = CommandList::fromHandle(hCommandList)->commandContainer.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT);
auto indirectHeap = CommandList::fromHandle(hCommandList)->getCmdContainer().getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT);
auto scratchSpaceController = this->csr->getScratchSpaceController();
programStateBaseAddress(scratchSpaceController->calculateNewGSH(),
indirectHeap->getGraphicsAllocation()->isAllocatedInLocalMemoryPool(),
@@ -873,9 +874,11 @@ void CommandQueueHw<gfxCoreFamily>::programOneCmdListBatchBufferStart(CommandLis
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::programOneCmdListBatchBufferStart(CommandList *commandList, NEO::LinearStream &cmdStream, CommandListExecutionContext &ctx) {
auto &cmdBufferAllocations = commandList->commandContainer.getCmdBufferAllocations();
auto &commandContainer = commandList->getCmdContainer();
auto &cmdBufferAllocations = commandContainer.getCmdBufferAllocations();
auto cmdBufferCount = cmdBufferAllocations.size();
bool isCommandListImmediate = (commandList->cmdListType == CommandList::CommandListType::TYPE_IMMEDIATE) ? true : false;
bool isCommandListImmediate = (commandList->getCmdListType() == CommandList::CommandListType::TYPE_IMMEDIATE) ? true : false;
auto &returnPoints = commandList->getReturnPoints();
uint32_t returnPointsSize = commandList->getReturnPointsSize();
@@ -885,7 +888,7 @@ void CommandQueueHw<gfxCoreFamily>::programOneCmdListBatchBufferStart(CommandLis
auto allocation = cmdBufferAllocations[iter];
uint64_t startOffset = allocation->getGpuAddress();
if (isCommandListImmediate && (iter == (cmdBufferCount - 1))) {
startOffset = ptrOffset(allocation->getGpuAddress(), commandList->commandContainer.currentLinearStreamStartOffset);
startOffset = ptrOffset(allocation->getGpuAddress(), commandContainer.currentLinearStreamStartOffset);
}
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&cmdStream, startOffset, true, false, false);
if (returnPointsSize > 0) {
@@ -916,7 +919,7 @@ void CommandQueueHw<gfxCoreFamily>::programOneCmdListBatchBufferStart(CommandLis
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::mergeOneCmdListPipelinedState(CommandList *commandList) {
bool isCommandListImmediate = (commandList->cmdListType == CommandList::CommandListType::TYPE_IMMEDIATE) ? true : false;
bool isCommandListImmediate = (commandList->getCmdListType() == CommandList::CommandListType::TYPE_IMMEDIATE) ? true : false;
auto commandListImp = static_cast<CommandListImp *>(commandList);
if (!isCommandListImmediate && commandListImp->getLogicalStateHelper()) {
this->csr->getLogicalStateHelper()->mergePipelinedState(*commandListImp->getLogicalStateHelper());
@@ -952,8 +955,8 @@ void CommandQueueHw<gfxCoreFamily>::prefetchMemoryToDeviceAssociatedWithCmdList(
auto prefetchManager = this->device->getDriverHandle()->getMemoryManager()->getPrefetchManager();
prefetchManager->migrateAllocationsToGpu(commandList->getPrefetchContext(),
*this->device->getDriverHandle()->getSvmAllocsManager(),
*commandList->device->getNEODevice(),
*commandList->csr);
*this->device->getNEODevice(),
*this->csr);
}
}
@@ -1229,7 +1232,7 @@ void CommandQueueHw<gfxCoreFamily>::programRequiredStateBaseAddressForCommandLis
csrState.stateBaseAddress.setProperties(cmdListRequired.stateBaseAddress);
if (ctx.gsbaStateDirty || csrState.stateBaseAddress.isDirty()) {
auto indirectHeap = commandList->commandContainer.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT);
auto indirectHeap = commandList->getCmdContainer().getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT);
auto scratchSpaceController = this->csr->getScratchSpaceController();
programStateBaseAddress(scratchSpaceController->calculateNewGSH(),
indirectHeap->getGraphicsAllocation()->isAllocatedInLocalMemoryPool(),
@@ -1246,17 +1249,18 @@ void CommandQueueHw<gfxCoreFamily>::programRequiredStateBaseAddressForCommandLis
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::updateBaseAddressState(CommandList *lastCommandList) {
auto csrHw = static_cast<NEO::CommandStreamReceiverHw<GfxFamily> *>(csr);
auto dsh = lastCommandList->commandContainer.getIndirectHeap(NEO::HeapType::DYNAMIC_STATE);
auto &commandContainer = lastCommandList->getCmdContainer();
auto dsh = commandContainer.getIndirectHeap(NEO::HeapType::DYNAMIC_STATE);
if (dsh != nullptr) {
csrHw->getDshState().updateAndCheck(dsh);
}
auto ssh = lastCommandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE);
auto ssh = commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE);
if (ssh != nullptr) {
csrHw->getSshState().updateAndCheck(ssh);
}
auto ioh = lastCommandList->commandContainer.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT);
auto ioh = commandContainer.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT);
csrHw->getIohState().updateAndCheck(ioh);
}

View File

@@ -751,7 +751,7 @@ ze_result_t ContextImp::createCommandList(ze_device_handle_t hDevice,
ze_command_list_handle_t *commandList) {
auto ret = L0::Device::fromHandle(hDevice)->createCommandList(desc, commandList);
if (*commandList) {
L0::CommandList::fromHandle(*commandList)->hContext = this->toHandle();
L0::CommandList::fromHandle(*commandList)->setCmdListContext(this->toHandle());
}
return ret;
}
@@ -761,7 +761,7 @@ ze_result_t ContextImp::createCommandListImmediate(ze_device_handle_t hDevice,
ze_command_list_handle_t *commandList) {
auto ret = L0::Device::fromHandle(hDevice)->createCommandListImmediate(desc, commandList);
if (*commandList) {
L0::CommandList::fromHandle(*commandList)->hContext = this->toHandle();
L0::CommandList::fromHandle(*commandList)->setCmdListContext(this->toHandle());
}
return ret;
}

View File

@@ -76,7 +76,7 @@ struct BcsSplit {
std::function<ze_result_t(T, K, size_t, ze_event_handle_t)> appendCall) {
ze_result_t result = ZE_RESULT_SUCCESS;
auto markerEventIndex = this->events.obtainForSplit(Context::fromHandle(cmdList->hContext), MemoryConstants::pageSize64k / sizeof(typename CommandListCoreFamilyImmediate<gfxCoreFamily>::GfxFamily::TimestampPacketType));
auto markerEventIndex = this->events.obtainForSplit(Context::fromHandle(cmdList->getCmdListContext()), MemoryConstants::pageSize64k / sizeof(typename CommandListCoreFamilyImmediate<gfxCoreFamily>::GfxFamily::TimestampPacketType));
auto barrierRequired = cmdList->isBarrierRequired();
if (barrierRequired) {
@@ -108,7 +108,7 @@ struct BcsSplit {
auto eventHandle = this->events.subcopy[subcopyEventIndex + i]->toHandle();
result = appendCall(localDstPtr, localSrcPtr, localSize, eventHandle);
if (cmdList->isFlushTaskSubmissionEnabled) {
if (cmdList->flushTaskSubmissionEnabled()) {
cmdList->executeCommandListImmediateWithFlushTaskImpl(performMigration, false, hasRelaxedOrderingDependencies, cmdQsForSplit[i]);
} else {
cmdList->executeCommandListImmediateImpl(performMigration, cmdQsForSplit[i]);