mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-25 13:33:02 +08:00
feature: handle limited sync dispatch initialization
Related-To: NEO-8171 Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
3560e8a735
commit
6c7a568f64
@@ -201,6 +201,8 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
ze_result_t addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, CommandToPatchContainer *outWaitCmds,
|
||||
bool relaxedOrderingAllowed, bool trackDependencies, bool waitForImplicitInOrderDependency, bool skipAddingWaitEventsToResidency);
|
||||
|
||||
void appendSynchronizedDispatchInitializationSection();
|
||||
|
||||
ze_result_t reserveSpace(size_t size, void **ptr) override;
|
||||
ze_result_t reset() override;
|
||||
ze_result_t executeCommandListImmediate(bool performMigration) override;
|
||||
|
||||
@@ -376,6 +376,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_h
|
||||
return ret;
|
||||
}
|
||||
|
||||
appendSynchronizedDispatchInitializationSection();
|
||||
|
||||
Event *event = nullptr;
|
||||
if (hEvent) {
|
||||
event = Event::fromHandle(hEvent);
|
||||
@@ -419,6 +421,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(
|
||||
return ret;
|
||||
}
|
||||
|
||||
appendSynchronizedDispatchInitializationSection();
|
||||
|
||||
Event *event = nullptr;
|
||||
if (hSignalEvent) {
|
||||
event = Event::fromHandle(hSignalEvent);
|
||||
@@ -453,6 +457,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
|
||||
return ret;
|
||||
}
|
||||
|
||||
appendSynchronizedDispatchInitializationSection();
|
||||
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
Event *event = nullptr;
|
||||
if (hEvent) {
|
||||
@@ -496,6 +502,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
|
||||
return ret;
|
||||
}
|
||||
|
||||
appendSynchronizedDispatchInitializationSection();
|
||||
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
launchParams.isIndirect = true;
|
||||
launchParams.isPredicate = true;
|
||||
@@ -556,6 +564,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
|
||||
handleInOrderImplicitDependencies(isRelaxedOrderingDispatchAllowed(0));
|
||||
}
|
||||
|
||||
appendSynchronizedDispatchInitializationSection();
|
||||
|
||||
event->resetPackets(false);
|
||||
event->disableHostCaching(!isImmediateType());
|
||||
commandContainer.addToResidencyContainer(event->getPoolAllocation(this->device));
|
||||
@@ -601,6 +611,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
|
||||
return ret;
|
||||
}
|
||||
|
||||
appendSynchronizedDispatchInitializationSection();
|
||||
|
||||
Event *signalEvent = nullptr;
|
||||
if (hSignalEvent) {
|
||||
signalEvent = Event::fromHandle(hSignalEvent);
|
||||
@@ -1483,6 +1495,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
return ret;
|
||||
}
|
||||
|
||||
appendSynchronizedDispatchInitializationSection();
|
||||
|
||||
bool dcFlush = false;
|
||||
Event *signalEvent = nullptr;
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
@@ -1922,6 +1936,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
return res;
|
||||
}
|
||||
|
||||
appendSynchronizedDispatchInitializationSection();
|
||||
|
||||
if (!handleCounterBasedEventOperations(signalEvent)) {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
@@ -2803,6 +2819,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
|
||||
return ret;
|
||||
}
|
||||
|
||||
appendSynchronizedDispatchInitializationSection();
|
||||
|
||||
Event *signalEvent = nullptr;
|
||||
if (hSignalEvent) {
|
||||
signalEvent = Event::fromHandle(hSignalEvent);
|
||||
@@ -3349,6 +3367,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
|
||||
return ret;
|
||||
}
|
||||
|
||||
appendSynchronizedDispatchInitializationSection();
|
||||
|
||||
Event *signalEvent = nullptr;
|
||||
if (hSignalEvent) {
|
||||
signalEvent = Event::fromHandle(hSignalEvent);
|
||||
@@ -3912,4 +3932,19 @@ inline bool CommandListCoreFamily<gfxCoreFamily>::isCbEventBoundToCmdList(Event
|
||||
return event->isCounterBased() && event->getInOrderExecInfo().get() == inOrderExecInfo.get();
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendSynchronizedDispatchInitializationSection() {
|
||||
auto syncAlloc = device->getSyncDispatchTokenAllocation();
|
||||
|
||||
if (this->synchronizedDispatchMode != NEO::SynchronizedDispatchMode::disabled) {
|
||||
commandContainer.addToResidencyContainer(syncAlloc);
|
||||
}
|
||||
|
||||
if (this->synchronizedDispatchMode == NEO::SynchronizedDispatchMode::limited) {
|
||||
NEO::EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(), syncAlloc->getGpuAddress() + sizeof(uint32_t), 0u,
|
||||
GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD,
|
||||
false, false, false, true, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -151,6 +151,7 @@ struct Device : _ze_device_handle_t {
|
||||
virtual uint32_t getEventMaxKernelCount() const = 0;
|
||||
NEO::TagAllocatorBase *getDeviceInOrderCounterAllocator();
|
||||
NEO::TagAllocatorBase *getHostInOrderCounterAllocator();
|
||||
NEO::GraphicsAllocation *getSyncDispatchTokenAllocation() const { return syncDispatchTokenAllocation; }
|
||||
uint32_t getNextSyncDispatchQueueId();
|
||||
void ensureSyncDispatchTokenAllocation();
|
||||
|
||||
|
||||
@@ -31,6 +31,7 @@ struct InOrderCmdListFixture : public ::Test<ModuleFixture> {
|
||||
using EventImp<uint32_t>::inOrderAllocationOffset;
|
||||
using EventImp<uint32_t>::csrs;
|
||||
using EventImp<uint32_t>::signalScope;
|
||||
using EventImp<uint32_t>::unsetCmdQueue;
|
||||
|
||||
void makeCounterBasedInitiallyDisabled() {
|
||||
counterBasedMode = CounterBasedMode::initiallyDisabled;
|
||||
|
||||
@@ -5869,7 +5869,13 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeAndNoopWaitEventsAllowedWhenEvent
|
||||
}
|
||||
|
||||
using SynchronizedDispatchTests = InOrderCmdListFixture;
|
||||
using MultiTileSynchronizedDispatchTests = MultiTileInOrderCmdListTests;
|
||||
|
||||
struct MultiTileSynchronizedDispatchTests : public MultiTileInOrderCmdListTests {
|
||||
void SetUp() override {
|
||||
NEO::debugManager.flags.ForceSynchronizedDispatchMode.set(1);
|
||||
MultiTileInOrderCmdListTests::SetUp();
|
||||
}
|
||||
};
|
||||
|
||||
HWTEST2_F(SynchronizedDispatchTests, givenSingleTileSyncDispatchQueueWhenCreatingThenDontAssignQueueId, IsAtLeastSkl) {
|
||||
NEO::debugManager.flags.ForceSynchronizedDispatchMode.set(1);
|
||||
@@ -5893,6 +5899,8 @@ HWTEST2_F(SynchronizedDispatchTests, givenSingleTileSyncDispatchQueueWhenCreatin
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileSynchronizedDispatchTests, givenDebugFlagSetWhenCreatingCmdListThenEnableSynchronizedDispatch, IsAtLeastSkl) {
|
||||
NEO::debugManager.flags.ForceSynchronizedDispatchMode.set(-1);
|
||||
|
||||
auto immCmdList = createMultiTileImmCmdList<gfxCoreFamily>();
|
||||
auto regularCmdList = createMultiTileRegularCmdList<gfxCoreFamily>(false);
|
||||
|
||||
@@ -5917,8 +5925,6 @@ HWTEST2_F(MultiTileSynchronizedDispatchTests, givenDebugFlagSetWhenCreatingCmdLi
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileSynchronizedDispatchTests, givenMultiTileSyncDispatchQueueWhenCreatingThenAssignQueueId, IsAtLeastSkl) {
|
||||
NEO::debugManager.flags.ForceSynchronizedDispatchMode.set(1);
|
||||
|
||||
auto regularCmdList0 = createMultiTileRegularCmdList<gfxCoreFamily>(false);
|
||||
auto regularCmdList1 = createMultiTileRegularCmdList<gfxCoreFamily>(false);
|
||||
auto immCmdList0 = createMultiTileImmCmdList<gfxCoreFamily>();
|
||||
@@ -5938,8 +5944,6 @@ HWTEST2_F(MultiTileSynchronizedDispatchTests, givenMultiTileSyncDispatchQueueWhe
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileSynchronizedDispatchTests, givenSyncDispatchEnabledWhenAllocatingQueueIdThenEnsureTokenAllocation, IsAtLeastSkl) {
|
||||
NEO::debugManager.flags.ForceSynchronizedDispatchMode.set(1);
|
||||
|
||||
auto mockDevice = static_cast<MockDeviceImp *>(device);
|
||||
|
||||
EXPECT_EQ(nullptr, mockDevice->syncDispatchTokenAllocation);
|
||||
@@ -5958,5 +5962,169 @@ HWTEST2_F(MultiTileSynchronizedDispatchTests, givenSyncDispatchEnabledWhenAlloca
|
||||
EXPECT_EQ(mockDevice->syncDispatchTokenAllocation, syncAllocation);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileSynchronizedDispatchTests, givenSyncDispatchWhenAppendingThenHandleResidency, IsAtLeastSkl) {
|
||||
auto immCmdList = createMultiTileImmCmdList<gfxCoreFamily>();
|
||||
|
||||
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
|
||||
ultCsr->storeMakeResidentAllocations = true;
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(1u, ultCsr->makeResidentAllocations[device->getSyncDispatchTokenAllocation()]);
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(2u, ultCsr->makeResidentAllocations[device->getSyncDispatchTokenAllocation()]);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileSynchronizedDispatchTests, givenLimitedSyncDispatchWhenAppendingThenProgramTokenCheck, IsAtLeastSkl) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using COMPARE_OPERATION = typename MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||
|
||||
void *alloc = nullptr;
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &alloc);
|
||||
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
|
||||
auto immCmdList = createMultiTileImmCmdList<gfxCoreFamily>();
|
||||
immCmdList->synchronizedDispatchMode = NEO::SynchronizedDispatchMode::limited;
|
||||
|
||||
auto eventPool = createEvents<FamilyType>(1, false);
|
||||
events[0]->makeCounterBasedInitiallyDisabled();
|
||||
|
||||
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
|
||||
size_t offset = cmdStream->getUsed();
|
||||
|
||||
auto verifyTokenCheck = [&](uint32_t numDependencies) {
|
||||
GenCmdList cmdList;
|
||||
EXPECT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), offset), (cmdStream->getUsed() - offset)));
|
||||
if (::testing::Test::HasFailure()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto semaphore = find<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), semaphore);
|
||||
if (::testing::Test::HasFailure()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < numDependencies; i++) {
|
||||
for (uint32_t j = 1; j < partitionCount; j++) {
|
||||
semaphore++;
|
||||
semaphore = find<MI_SEMAPHORE_WAIT *>(semaphore, cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), semaphore);
|
||||
}
|
||||
semaphore++;
|
||||
}
|
||||
|
||||
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphore);
|
||||
EXPECT_NE(nullptr, semaphoreCmd);
|
||||
if (::testing::Test::HasFailure()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
EXPECT_EQ(0u, semaphoreCmd->getSemaphoreDataDword());
|
||||
EXPECT_EQ(device->getSyncDispatchTokenAllocation()->getGpuAddress() + sizeof(uint32_t), semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, semaphoreCmd->getCompareOperation());
|
||||
|
||||
return !::testing::Test::HasFailure();
|
||||
};
|
||||
|
||||
// first run without dependency
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_TRUE(verifyTokenCheck(0));
|
||||
|
||||
offset = cmdStream->getUsed();
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_TRUE(verifyTokenCheck(1));
|
||||
|
||||
offset = cmdStream->getUsed();
|
||||
immCmdList->appendLaunchCooperativeKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, false);
|
||||
EXPECT_TRUE(verifyTokenCheck(1));
|
||||
|
||||
offset = cmdStream->getUsed();
|
||||
immCmdList->appendLaunchKernelIndirect(kernel->toHandle(), *static_cast<ze_group_count_t *>(alloc), nullptr, 0, nullptr, false);
|
||||
EXPECT_TRUE(verifyTokenCheck(1));
|
||||
|
||||
offset = cmdStream->getUsed();
|
||||
const ze_kernel_handle_t launchKernels = kernel->toHandle();
|
||||
immCmdList->appendLaunchMultipleKernelsIndirect(1, &launchKernels, reinterpret_cast<const uint32_t *>(alloc), &groupCount, nullptr, 0, nullptr, false);
|
||||
EXPECT_TRUE(verifyTokenCheck(1));
|
||||
|
||||
offset = cmdStream->getUsed();
|
||||
immCmdList->appendEventReset(events[0]->toHandle());
|
||||
EXPECT_TRUE(verifyTokenCheck(1));
|
||||
|
||||
offset = cmdStream->getUsed();
|
||||
size_t rangeSizes = 1;
|
||||
const void **ranges = const_cast<const void **>(&alloc);
|
||||
immCmdList->appendMemoryRangesBarrier(1, &rangeSizes, ranges, nullptr, 0, nullptr);
|
||||
|
||||
offset = cmdStream->getUsed();
|
||||
immCmdList->appendMemoryCopy(alloc, alloc, 1, nullptr, 0, nullptr, false, false);
|
||||
EXPECT_TRUE(verifyTokenCheck(1));
|
||||
|
||||
offset = cmdStream->getUsed();
|
||||
ze_copy_region_t region = {0, 0, 0, 1, 1, 1};
|
||||
immCmdList->appendMemoryCopyRegion(alloc, ®ion, 1, 1, alloc, ®ion, 1, 1, nullptr, 0, nullptr, false, false);
|
||||
EXPECT_TRUE(verifyTokenCheck(1));
|
||||
|
||||
offset = cmdStream->getUsed();
|
||||
immCmdList->appendMemoryFill(alloc, alloc, 1, 1, nullptr, 0, nullptr, false);
|
||||
EXPECT_TRUE(verifyTokenCheck(1));
|
||||
|
||||
offset = cmdStream->getUsed();
|
||||
immCmdList->appendWriteGlobalTimestamp(reinterpret_cast<uint64_t *>(alloc), nullptr, 0, nullptr);
|
||||
EXPECT_TRUE(verifyTokenCheck(1));
|
||||
|
||||
offset = cmdStream->getUsed();
|
||||
auto handle = events[0]->toHandle();
|
||||
events[0]->unsetCmdQueue();
|
||||
immCmdList->appendBarrier(nullptr, 1, &handle, false);
|
||||
EXPECT_TRUE(verifyTokenCheck(2));
|
||||
|
||||
context->freeMem(alloc);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileSynchronizedDispatchTests, givenFullSyncDispatchWhenAppendingThenDontProgramTokenCheck, IsAtLeastSkl) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
|
||||
auto immCmdList = createMultiTileImmCmdList<gfxCoreFamily>();
|
||||
immCmdList->synchronizedDispatchMode = NEO::SynchronizedDispatchMode::full;
|
||||
|
||||
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
|
||||
size_t offset = cmdStream->getUsed();
|
||||
|
||||
auto verifyTokenCheck = [&](bool hasDependencySemaphore) {
|
||||
GenCmdList cmdList;
|
||||
EXPECT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), offset), (cmdStream->getUsed() - offset)));
|
||||
if (::testing::Test::HasFailure()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
|
||||
for (auto &semaphore : semaphores) {
|
||||
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphore);
|
||||
EXPECT_NE(nullptr, semaphoreCmd);
|
||||
if (::testing::Test::HasFailure()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
EXPECT_NE(device->getSyncDispatchTokenAllocation()->getGpuAddress() + sizeof(uint32_t), semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||
if (::testing::Test::HasFailure()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
// first run without dependency
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_TRUE(verifyTokenCheck(false));
|
||||
|
||||
offset = cmdStream->getUsed();
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_TRUE(verifyTokenCheck(true));
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
Reference in New Issue
Block a user