Setup partition registers once per context

Related-To: NEO-6262

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2021-11-17 19:51:43 +00:00
committed by Compute-Runtime-Automation
parent c6c27ed328
commit f56773d166
17 changed files with 173 additions and 165 deletions

View File

@@ -54,6 +54,9 @@ ze_result_t CommandQueueImp::initialize(bool copyOnly, bool isInternal) {
isCopyOnlyCommandQueue = copyOnly;
preemptionCmdSyncProgramming = getPreemptionCmdProgramming();
activeSubDevices = static_cast<uint32_t>(csr->getOsContext().getDeviceBitfield().count());
if (!isInternal) {
partitionCount = csr->getActivePartitions();
}
}
return returnValue;
}

View File

@@ -52,9 +52,6 @@ struct CommandQueueHw : public CommandQueueImp {
bool getPreemptionCmdProgramming() override;
void patchCommands(CommandList &commandList, uint64_t scratchAddress);
size_t getPartitionProgrammingSize();
void programPartitionConfiguration(NEO::LinearStream &stream);
};
} // namespace L0

View File

@@ -149,7 +149,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
}
bool directSubmissionEnabled = isCopyOnlyCommandQueue ? csr->isBlitterDirectSubmissionEnabled() : csr->isDirectSubmissionEnabled();
partitionCount = csr->getActivePartitions();
bool programActivePartitionConfig = csr->isProgramActivePartitionConfigRequired();
L0::Fence *fence = nullptr;
@@ -215,6 +215,11 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
linearStreamSizeEstimate += sizeof(MI_BATCH_BUFFER_END);
}
auto csrHw = reinterpret_cast<NEO::CommandStreamReceiverHw<GfxFamily> *>(csr);
if (programActivePartitionConfig) {
linearStreamSizeEstimate += csrHw->getCmdSizeForActivePartitionConfig();
}
auto &hwInfo = device->getHwInfo();
if (hFence) {
fence = Fence::fromHandle(hFence);
@@ -269,9 +274,6 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
}
linearStreamSizeEstimate += isCopyOnlyCommandQueue ? NEO::EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite() : NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo);
if (partitionCount > 1) {
linearStreamSizeEstimate += getPartitionProgrammingSize();
}
size_t alignedSize = alignUp<size_t>(linearStreamSizeEstimate, minCmdBufferPtrAlign);
size_t padding = alignedSize - linearStreamSizeEstimate;
@@ -282,6 +284,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
if (globalFenceAllocation) {
csr->makeResident(*globalFenceAllocation);
}
const auto workPartitionAllocation = csr->getWorkPartitionAllocation();
if (workPartitionAllocation) {
csr->makeResident(*workPartitionAllocation);
@@ -352,6 +355,10 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
}
}
if (programActivePartitionConfig) {
csrHw->programActivePartitionConfig(child);
}
for (auto i = 0u; i < numCommandLists; ++i) {
auto commandList = CommandList::fromHandle(phCommandLists[i]);
auto cmdBufferAllocations = commandList->commandContainer.getCmdBufferAllocations();
@@ -419,10 +426,6 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
commandQueuePreemptionMode = statePreemption;
if (partitionCount > 1) {
programPartitionConfiguration(child);
}
if (hFence) {
csr->makeResident(fence->getAllocation());
if (isCopyOnlyCommandQueue) {

View File

@@ -123,13 +123,4 @@ void CommandQueueHw<gfxCoreFamily>::patchCommands(CommandList &commandList, uint
UNRECOVERABLE_IF(!commandsToPatch.empty());
}
template <GFXCORE_FAMILY gfxCoreFamily>
size_t CommandQueueHw<gfxCoreFamily>::getPartitionProgrammingSize() {
return 0;
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::programPartitionConfiguration(NEO::LinearStream &stream) {
}
} // namespace L0

View File

@@ -153,20 +153,4 @@ void CommandQueueHw<gfxCoreFamily>::patchCommands(CommandList &commandList, uint
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
size_t CommandQueueHw<gfxCoreFamily>::getPartitionProgrammingSize() {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
return NEO::ImplicitScalingDispatch<GfxFamily>::getRegisterConfigurationSize();
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::programPartitionConfiguration(NEO::LinearStream &stream) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
uint64_t workPartitionAddress = csr->getWorkPartitionAllocationGpuAddress();
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchRegisterConfiguration(stream,
workPartitionAddress,
CommonConstants::partitionAddressOffset);
}
} // namespace L0