feature: full sync dispatch mode initialization path

Related-To: NEO-8171

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2024-03-27 15:15:25 +00:00
committed by Compute-Runtime-Automation
parent b109094e4b
commit 50908a0809
3 changed files with 164 additions and 14 deletions

View File

@@ -358,6 +358,7 @@ struct CommandListCoreFamily : public CommandListImp {
bool isQwordInOrderCounter() const { return GfxFamily::isQwordInOrderCounter; }
bool isInOrderNonWalkerSignalingRequired(const Event *event) const;
bool hasInOrderDependencies() const;
void appendFullSynchronizedDispatchInit();
size_t addCmdForPatching(std::shared_ptr<NEO::InOrderExecInfo> *externalInOrderExecInfo, void *cmd1, void *cmd2, uint64_t counterValue, NEO::InOrderPatchCommandHelpers::PatchCmdType patchCmdType);
uint64_t getInOrderIncrementValue() const;

View File

@@ -3955,7 +3955,74 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSynchronizedDispatchInitializat
NEO::EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(), syncAlloc->getGpuAddress() + sizeof(uint32_t), 0u,
GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD,
false, false, false, true, nullptr);
} else if (this->synchronizedDispatchMode == NEO::SynchronizedDispatchMode::full) {
appendFullSynchronizedDispatchInit();
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendFullSynchronizedDispatchInit() {
using MI_ATOMIC = typename GfxFamily::MI_ATOMIC;
using ATOMIC_OPCODES = typename MI_ATOMIC::ATOMIC_OPCODES;
using DATA_SIZE = typename MI_ATOMIC::DATA_SIZE;
constexpr size_t conditionalDataMemBbStartSize = NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataMemBatchBufferStart(false);
const uint32_t queueId = this->syncDispatchQueueId + 1;
const uint64_t queueIdToken = static_cast<uint64_t>(queueId) << 32;
const uint64_t tokenInitialValue = queueIdToken + this->partitionCount;
auto syncAllocationGpuVa = device->getSyncDispatchTokenAllocation()->getGpuAddress();
auto workPartitionAllocationGpuVa = device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation()->getGpuAddress();
auto cmdStream = commandContainer.getCommandStream();
// If Secondary Tile, then jump to Secondary Tile section
// Reserve space for now. Will be patched later
NEO::LinearStream skipPrimaryTileSectionCmdStream(cmdStream->getSpace(conditionalDataMemBbStartSize), conditionalDataMemBbStartSize);
// If token acquired, jump to the end
NEO::LinearStream jumpToEndSectionFromPrimaryTile;
// Primary Tile section
{
// Try acquire token
uint64_t acquireTokenCmdBufferVa = cmdStream->getCurrentGpuAddressPosition();
NEO::EncodeMiPredicate<GfxFamily>::encode(*cmdStream, NEO::MiPredicateType::disable);
NEO::EncodeAtomic<GfxFamily>::programMiAtomic(*cmdStream, syncAllocationGpuVa, ATOMIC_OPCODES::ATOMIC_8B_CMP_WR,
DATA_SIZE::DATA_SIZE_QWORD, 1, 1, 0, tokenInitialValue);
// If token acquired, jump to the end
// Reserve space for now. Will be patched later
jumpToEndSectionFromPrimaryTile.replaceBuffer(cmdStream->getSpace(conditionalDataMemBbStartSize), conditionalDataMemBbStartSize);
// Semaphore for potential switch
NEO::EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(*cmdStream, syncAllocationGpuVa + sizeof(uint32_t), 0u,
GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD,
false, false, false, true, nullptr);
// Loop back to acquire again
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(cmdStream, acquireTokenCmdBufferVa, false, false, false);
}
// Patch Primary Tile section skip (to Secondary Tile section)
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(skipPrimaryTileSectionCmdStream, cmdStream->getCurrentGpuAddressPosition(), workPartitionAllocationGpuVa, 0,
NEO::CompareOperation::notEqual, false, false);
// Secondary Tile section
{
NEO::EncodeMiPredicate<GfxFamily>::encode(*cmdStream, NEO::MiPredicateType::disable);
// Wait for token acquisition by Primary Tile
NEO::EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(*cmdStream, syncAllocationGpuVa + sizeof(uint32_t), queueId,
GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD,
false, false, false, true, nullptr);
}
// Patch Primary Tile section jump to end
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(jumpToEndSectionFromPrimaryTile, cmdStream->getCurrentGpuAddressPosition(), syncAllocationGpuVa + sizeof(uint32_t), queueId,
NEO::CompareOperation::equal, false, false);
// End section
NEO::EncodeMiPredicate<GfxFamily>::encode(*cmdStream, NEO::MiPredicateType::disable);
}
} // namespace L0