feature: full sync dispatch mode initialization path
Related-To: NEO-8171 Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
parent
b109094e4b
commit
50908a0809
|
@ -358,6 +358,7 @@ struct CommandListCoreFamily : public CommandListImp {
|
|||
bool isQwordInOrderCounter() const { return GfxFamily::isQwordInOrderCounter; }
|
||||
bool isInOrderNonWalkerSignalingRequired(const Event *event) const;
|
||||
bool hasInOrderDependencies() const;
|
||||
void appendFullSynchronizedDispatchInit();
|
||||
|
||||
size_t addCmdForPatching(std::shared_ptr<NEO::InOrderExecInfo> *externalInOrderExecInfo, void *cmd1, void *cmd2, uint64_t counterValue, NEO::InOrderPatchCommandHelpers::PatchCmdType patchCmdType);
|
||||
uint64_t getInOrderIncrementValue() const;
|
||||
|
|
|
@ -3955,7 +3955,74 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSynchronizedDispatchInitializat
|
|||
NEO::EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(), syncAlloc->getGpuAddress() + sizeof(uint32_t), 0u,
|
||||
GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD,
|
||||
false, false, false, true, nullptr);
|
||||
} else if (this->synchronizedDispatchMode == NEO::SynchronizedDispatchMode::full) {
|
||||
appendFullSynchronizedDispatchInit();
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendFullSynchronizedDispatchInit() {
|
||||
using MI_ATOMIC = typename GfxFamily::MI_ATOMIC;
|
||||
using ATOMIC_OPCODES = typename MI_ATOMIC::ATOMIC_OPCODES;
|
||||
using DATA_SIZE = typename MI_ATOMIC::DATA_SIZE;
|
||||
|
||||
constexpr size_t conditionalDataMemBbStartSize = NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataMemBatchBufferStart(false);
|
||||
|
||||
const uint32_t queueId = this->syncDispatchQueueId + 1;
|
||||
const uint64_t queueIdToken = static_cast<uint64_t>(queueId) << 32;
|
||||
const uint64_t tokenInitialValue = queueIdToken + this->partitionCount;
|
||||
|
||||
auto syncAllocationGpuVa = device->getSyncDispatchTokenAllocation()->getGpuAddress();
|
||||
auto workPartitionAllocationGpuVa = device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation()->getGpuAddress();
|
||||
auto cmdStream = commandContainer.getCommandStream();
|
||||
|
||||
// If Secondary Tile, then jump to Secondary Tile section
|
||||
// Reserve space for now. Will be patched later
|
||||
NEO::LinearStream skipPrimaryTileSectionCmdStream(cmdStream->getSpace(conditionalDataMemBbStartSize), conditionalDataMemBbStartSize);
|
||||
|
||||
// If token acquired, jump to the end
|
||||
NEO::LinearStream jumpToEndSectionFromPrimaryTile;
|
||||
|
||||
// Primary Tile section
|
||||
{
|
||||
// Try acquire token
|
||||
uint64_t acquireTokenCmdBufferVa = cmdStream->getCurrentGpuAddressPosition();
|
||||
NEO::EncodeMiPredicate<GfxFamily>::encode(*cmdStream, NEO::MiPredicateType::disable);
|
||||
NEO::EncodeAtomic<GfxFamily>::programMiAtomic(*cmdStream, syncAllocationGpuVa, ATOMIC_OPCODES::ATOMIC_8B_CMP_WR,
|
||||
DATA_SIZE::DATA_SIZE_QWORD, 1, 1, 0, tokenInitialValue);
|
||||
|
||||
// If token acquired, jump to the end
|
||||
// Reserve space for now. Will be patched later
|
||||
jumpToEndSectionFromPrimaryTile.replaceBuffer(cmdStream->getSpace(conditionalDataMemBbStartSize), conditionalDataMemBbStartSize);
|
||||
|
||||
// Semaphore for potential switch
|
||||
NEO::EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(*cmdStream, syncAllocationGpuVa + sizeof(uint32_t), 0u,
|
||||
GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD,
|
||||
false, false, false, true, nullptr);
|
||||
|
||||
// Loop back to acquire again
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(cmdStream, acquireTokenCmdBufferVa, false, false, false);
|
||||
}
|
||||
|
||||
// Patch Primary Tile section skip (to Secondary Tile section)
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(skipPrimaryTileSectionCmdStream, cmdStream->getCurrentGpuAddressPosition(), workPartitionAllocationGpuVa, 0,
|
||||
NEO::CompareOperation::notEqual, false, false);
|
||||
|
||||
// Secondary Tile section
|
||||
{
|
||||
NEO::EncodeMiPredicate<GfxFamily>::encode(*cmdStream, NEO::MiPredicateType::disable);
|
||||
|
||||
// Wait for token acquisition by Primary Tile
|
||||
NEO::EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(*cmdStream, syncAllocationGpuVa + sizeof(uint32_t), queueId,
|
||||
GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD,
|
||||
false, false, false, true, nullptr);
|
||||
}
|
||||
|
||||
// Patch Primary Tile section jump to end
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(jumpToEndSectionFromPrimaryTile, cmdStream->getCurrentGpuAddressPosition(), syncAllocationGpuVa + sizeof(uint32_t), queueId,
|
||||
NEO::CompareOperation::equal, false, false);
|
||||
|
||||
// End section
|
||||
NEO::EncodeMiPredicate<GfxFamily>::encode(*cmdStream, NEO::MiPredicateType::disable);
|
||||
}
|
||||
} // namespace L0
|
||||
|
|
|
@ -6131,34 +6131,116 @@ HWTEST2_F(MultiTileSynchronizedDispatchTests, givenLimitedSyncDispatchWhenAppend
|
|||
context->freeMem(alloc);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileSynchronizedDispatchTests, givenFullSyncDispatchWhenAppendingThenDontProgramTokenCheck, IsAtLeastSkl) {
|
||||
HWTEST2_F(MultiTileSynchronizedDispatchTests, givenFullSyncDispatchWhenAppendingThenProgramTokenAcquire, IsAtLeastXeHpcCore) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using MI_SET_PREDICATE = typename FamilyType::MI_SET_PREDICATE;
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
|
||||
auto immCmdList = createMultiTileImmCmdList<gfxCoreFamily>();
|
||||
immCmdList->synchronizedDispatchMode = NEO::SynchronizedDispatchMode::full;
|
||||
immCmdList->syncDispatchQueueId = 0x1234;
|
||||
|
||||
const uint32_t queueId = immCmdList->syncDispatchQueueId + 1;
|
||||
const uint64_t queueIdToken = static_cast<uint64_t>(queueId) << 32;
|
||||
const uint64_t tokenInitialValue = queueIdToken + partitionCount;
|
||||
|
||||
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
|
||||
size_t offset = cmdStream->getUsed();
|
||||
|
||||
auto verifyTokenCheck = [&](bool hasDependencySemaphore) {
|
||||
auto verifyTokenAcquisition = [&](bool hasDependencySemaphore) {
|
||||
GenCmdList cmdList;
|
||||
EXPECT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), offset), (cmdStream->getUsed() - offset)));
|
||||
if (::testing::Test::HasFailure()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
|
||||
for (auto &semaphore : semaphores) {
|
||||
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphore);
|
||||
EXPECT_NE(nullptr, semaphoreCmd);
|
||||
if (::testing::Test::HasFailure()) {
|
||||
return false;
|
||||
auto itor = cmdList.begin();
|
||||
if (hasDependencySemaphore) {
|
||||
for (uint32_t i = 0; i < partitionCount; i++) {
|
||||
itor = find<MI_SEMAPHORE_WAIT *>(itor, cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
itor++;
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_NE(device->getSyncDispatchTokenAllocation()->getGpuAddress() + sizeof(uint32_t), semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||
if (::testing::Test::HasFailure()) {
|
||||
return false;
|
||||
}
|
||||
// Primary-secondaty path selection
|
||||
void *primaryTileSectionSkipVa = *itor;
|
||||
|
||||
// Primary Tile section
|
||||
auto miPredicate = reinterpret_cast<MI_SET_PREDICATE *>(
|
||||
ptrOffset(primaryTileSectionSkipVa, NEO::EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(false)));
|
||||
void *loopBackToAcquireVa = miPredicate;
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyMiPredicate<FamilyType>(miPredicate, MiPredicateType::disable)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto miAtomic = reinterpret_cast<MI_ATOMIC *>(++miPredicate);
|
||||
EXPECT_EQ(MI_ATOMIC::DWORD_LENGTH::DWORD_LENGTH_INLINE_DATA_1, miAtomic->getDwordLength());
|
||||
EXPECT_EQ(1u, miAtomic->getInlineData());
|
||||
|
||||
EXPECT_EQ(0u, miAtomic->getOperand1DataDword0());
|
||||
EXPECT_EQ(0u, miAtomic->getOperand1DataDword1());
|
||||
|
||||
EXPECT_EQ(getLowPart(tokenInitialValue), miAtomic->getOperand2DataDword0());
|
||||
EXPECT_EQ(getHighPart(tokenInitialValue), miAtomic->getOperand2DataDword1());
|
||||
|
||||
EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_8B_CMP_WR, miAtomic->getAtomicOpcode());
|
||||
EXPECT_EQ(MI_ATOMIC::DATA_SIZE::DATA_SIZE_QWORD, miAtomic->getDataSize());
|
||||
|
||||
if (::testing::Test::HasFailure()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
void *jumpToEndSectionFromPrimaryTile = ++miAtomic;
|
||||
|
||||
auto semaphore = reinterpret_cast<MI_SEMAPHORE_WAIT *>(
|
||||
ptrOffset(jumpToEndSectionFromPrimaryTile, NEO::EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(false)));
|
||||
|
||||
EXPECT_EQ(0u, semaphore->getSemaphoreDataDword());
|
||||
uint64_t syncAllocGpuVa = device->getSyncDispatchTokenAllocation()->getGpuAddress();
|
||||
EXPECT_EQ(syncAllocGpuVa + sizeof(uint32_t), semaphore->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, semaphore->getCompareOperation());
|
||||
|
||||
if (::testing::Test::HasFailure()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto bbStart = reinterpret_cast<MI_BATCH_BUFFER_START *>(++semaphore);
|
||||
EXPECT_EQ(castToUint64(loopBackToAcquireVa), bbStart->getBatchBufferStartAddress());
|
||||
|
||||
if (::testing::Test::HasFailure()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint64_t workPartitionGpuVa = device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation()->getGpuAddress();
|
||||
|
||||
// Secondary Tile section
|
||||
miPredicate = reinterpret_cast<MI_SET_PREDICATE *>(++bbStart);
|
||||
if (!RelaxedOrderingCommandsHelper::verifyMiPredicate<FamilyType>(miPredicate, MiPredicateType::disable)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Primary Tile section skip - patching
|
||||
if (!RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(primaryTileSectionSkipVa, castToUint64(miPredicate), workPartitionGpuVa, 0, NEO::CompareOperation::notEqual, false, false)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
semaphore = reinterpret_cast<MI_SEMAPHORE_WAIT *>(++miPredicate);
|
||||
EXPECT_EQ(queueId, semaphore->getSemaphoreDataDword());
|
||||
EXPECT_EQ(syncAllocGpuVa + sizeof(uint32_t), semaphore->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, semaphore->getCompareOperation());
|
||||
|
||||
// End section
|
||||
miPredicate = reinterpret_cast<MI_SET_PREDICATE *>(++semaphore);
|
||||
if (!RelaxedOrderingCommandsHelper::verifyMiPredicate<FamilyType>(miPredicate, MiPredicateType::disable)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Jump to end from Primary Tile section - patching
|
||||
if (!RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(jumpToEndSectionFromPrimaryTile, castToUint64(miPredicate), syncAllocGpuVa + sizeof(uint32_t), queueId, NEO::CompareOperation::equal, false, false)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -6166,11 +6248,11 @@ HWTEST2_F(MultiTileSynchronizedDispatchTests, givenFullSyncDispatchWhenAppending
|
|||
|
||||
// first run without dependency
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_TRUE(verifyTokenCheck(false));
|
||||
EXPECT_TRUE(verifyTokenAcquisition(false));
|
||||
|
||||
offset = cmdStream->getUsed();
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_TRUE(verifyTokenCheck(true));
|
||||
EXPECT_TRUE(verifyTokenAcquisition(true));
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
|
|
Loading…
Reference in New Issue