mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 14:55:24 +08:00
feature: flush task count on cmd list hostSynchronize if needed
Related-To: NEO-7966 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
33d6e775aa
commit
7e6e0da978
@@ -184,7 +184,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
using BaseClass::inOrderDependencyCounterAllocation;
|
||||
|
||||
void printKernelsPrintfOutput(bool hangDetected);
|
||||
ze_result_t synchronizeInOrderExecution(uint64_t timeout) const;
|
||||
MOCKABLE_VIRTUAL ze_result_t synchronizeInOrderExecution(uint64_t timeout) const;
|
||||
ze_result_t hostSynchronize(uint64_t timeout, TaskCountType taskCount, bool handlePostWaitOperations);
|
||||
bool hasStallingCmdsForRelaxedOrdering(uint32_t numWaitEvents, bool relaxedOrderingDispatch) const;
|
||||
void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) override;
|
||||
@@ -195,6 +195,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
MOCKABLE_VIRTUAL void checkAssert();
|
||||
ComputeFlushMethodType computeFlushMethod = nullptr;
|
||||
std::atomic<bool> dependenciesPresent{false};
|
||||
bool latestFlushIsHostVisible = false;
|
||||
};
|
||||
|
||||
template <PRODUCT_FAMILY gfxProductFamily>
|
||||
|
||||
@@ -851,7 +851,9 @@ template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint64_t timeout, TaskCountType taskCount, bool handlePostWaitOperations) {
|
||||
ze_result_t status = ZE_RESULT_SUCCESS;
|
||||
|
||||
if (isInOrderExecutionEnabled() && NEO::DebugManager.flags.UseCounterAllocToSyncInOrderCmdList.get() != 0) {
|
||||
bool inOrderWaitAllowed = (isInOrderExecutionEnabled() && !handlePostWaitOperations && this->latestFlushIsHostVisible);
|
||||
|
||||
if (inOrderWaitAllowed) {
|
||||
status = synchronizeInOrderExecution(timeout);
|
||||
} else {
|
||||
const int64_t timeoutInMicroSeconds = timeout / 1000;
|
||||
@@ -860,10 +862,12 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint6
|
||||
taskCount);
|
||||
if (waitStatus == NEO::WaitStatus::GpuHang) {
|
||||
status = ZE_RESULT_ERROR_DEVICE_LOST;
|
||||
} else if (waitStatus == NEO::WaitStatus::NotReady) {
|
||||
status = ZE_RESULT_NOT_READY;
|
||||
}
|
||||
}
|
||||
|
||||
if (handlePostWaitOperations) {
|
||||
if (handlePostWaitOperations && status != ZE_RESULT_NOT_READY) {
|
||||
if (status == ZE_RESULT_SUCCESS) {
|
||||
this->cmdQImmediate->unregisterCsrClient();
|
||||
this->csr->getInternalAllocationStorage()->cleanAllocationList(taskCount, NEO::AllocationUsage::TEMPORARY_ALLOCATION);
|
||||
@@ -903,10 +907,13 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_res
|
||||
|
||||
if (signalEvent) {
|
||||
signalEvent->setCsr(this->csr, isInOrderExecutionEnabled());
|
||||
this->latestFlushIsHostVisible = signalEvent->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST);
|
||||
|
||||
if (isInOrderExecutionEnabled()) {
|
||||
signalEvent->enableInOrderExecMode(*this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, this->inOrderAllocationOffset);
|
||||
}
|
||||
} else {
|
||||
this->latestFlushIsHostVisible = false;
|
||||
}
|
||||
|
||||
return inputRet;
|
||||
|
||||
@@ -165,6 +165,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
|
||||
using BaseClass::frontEndStateTracking;
|
||||
using BaseClass::getDcFlushRequired;
|
||||
using BaseClass::getHostPtrAlloc;
|
||||
using BaseClass::hostSynchronize;
|
||||
using BaseClass::immediateCmdListHeapSharing;
|
||||
using BaseClass::inOrderDependencyCounter;
|
||||
using BaseClass::inOrderDependencyCounterAllocation;
|
||||
@@ -172,6 +173,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
|
||||
using BaseClass::isFlushTaskSubmissionEnabled;
|
||||
using BaseClass::isSyncModeQueue;
|
||||
using BaseClass::isTbxMode;
|
||||
using BaseClass::latestFlushIsHostVisible;
|
||||
using BaseClass::partitionCount;
|
||||
using BaseClass::pipeControlMultiKernelEventSync;
|
||||
using BaseClass::pipelineSelectStateTracking;
|
||||
@@ -183,6 +185,9 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
|
||||
using BaseClass::synchronizeInOrderExecution;
|
||||
|
||||
WhiteBox() : BaseClass(BaseClass::defaultNumIddsPerBlock) {}
|
||||
|
||||
ADDMETHOD_CONST(synchronizeInOrderExecution, ze_result_t, true, ZE_RESULT_SUCCESS,
|
||||
(uint64_t timeout), (timeout));
|
||||
};
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
|
||||
@@ -665,6 +665,7 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel {
|
||||
using EventImp<uint32_t>::inOrderExecSignalValue;
|
||||
using EventImp<uint32_t>::inOrderAllocationOffset;
|
||||
using EventImp<uint32_t>::csrs;
|
||||
using EventImp<uint32_t>::signalScope;
|
||||
};
|
||||
|
||||
void SetUp() override {
|
||||
@@ -694,6 +695,7 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel {
|
||||
}
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||
|
||||
@@ -807,8 +809,6 @@ HWTEST2_F(InOrderCmdListTests, givenQueueFlagWhenCreatingCmdListThenEnableRelaxe
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenCmdListsWhenDispatchingThenUseInternalTaskCountForWaits, IsAtLeastSkl) {
|
||||
DebugManager.flags.UseCounterAllocToSyncInOrderCmdList.set(0);
|
||||
|
||||
auto immCmdList0 = createImmCmdList<gfxCoreFamily>();
|
||||
auto immCmdList1 = createImmCmdList<gfxCoreFamily>();
|
||||
|
||||
@@ -1413,6 +1413,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingTimestampEventThen
|
||||
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
|
||||
|
||||
auto eventPool = createEvents<FamilyType>(1, true);
|
||||
events[0]->signalScope = 0;
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
|
||||
|
||||
@@ -1456,6 +1457,40 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingTimestampEventThen
|
||||
EXPECT_EQ(1u, sdiCmd->getDataDword0());
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenHostVisibleEventOnLatestFlushWhenCallingSynchronizeThenUseInOrderSync, IsAtLeastSkl) {
|
||||
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
|
||||
|
||||
auto immCmdList = createImmCmdList<gfxCoreFamily>();
|
||||
|
||||
auto eventPool = createEvents<FamilyType>(1, true);
|
||||
events[0]->signalScope = 0;
|
||||
|
||||
EXPECT_FALSE(immCmdList->latestFlushIsHostVisible);
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
|
||||
EXPECT_FALSE(immCmdList->latestFlushIsHostVisible);
|
||||
|
||||
EXPECT_EQ(0u, immCmdList->synchronizeInOrderExecutionCalled);
|
||||
EXPECT_EQ(0u, ultCsr->waitForCompletionWithTimeoutTaskCountCalled);
|
||||
|
||||
immCmdList->hostSynchronize(0, 1, false);
|
||||
EXPECT_EQ(0u, immCmdList->synchronizeInOrderExecutionCalled);
|
||||
EXPECT_EQ(1u, ultCsr->waitForCompletionWithTimeoutTaskCountCalled);
|
||||
|
||||
events[0]->signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
|
||||
EXPECT_TRUE(immCmdList->latestFlushIsHostVisible);
|
||||
|
||||
immCmdList->hostSynchronize(0, 1, false);
|
||||
EXPECT_EQ(1u, immCmdList->synchronizeInOrderExecutionCalled);
|
||||
EXPECT_EQ(1u, ultCsr->waitForCompletionWithTimeoutTaskCountCalled);
|
||||
|
||||
// handle post sync operations
|
||||
immCmdList->hostSynchronize(0, 1, true);
|
||||
EXPECT_EQ(1u, immCmdList->synchronizeInOrderExecutionCalled);
|
||||
EXPECT_EQ(2u, ultCsr->waitForCompletionWithTimeoutTaskCountCalled);
|
||||
}
|
||||
|
||||
using NonPostSyncWalkerMatcher = IsWithinGfxCore<IGFX_GEN9_CORE, IGFX_GEN12LP_CORE>;
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenProgramPipeControlWithSignalAllocation, NonPostSyncWalkerMatcher) {
|
||||
@@ -2173,7 +2208,9 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti
|
||||
|
||||
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
auto eventPool = createEvents<FamilyType>(1, false);
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
|
||||
|
||||
auto hostAddress = static_cast<uint32_t *>(ptrOffset(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer(), counterOffset));
|
||||
*hostAddress = 0;
|
||||
@@ -2191,8 +2228,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti
|
||||
|
||||
// single check - not ready
|
||||
{
|
||||
EXPECT_EQ(ZE_RESULT_NOT_READY, immCmdList->hostSynchronize(0));
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_NOT_READY, immCmdList->hostSynchronize(0, ultCsr->taskCount, false));
|
||||
EXPECT_EQ(1u, callCounter);
|
||||
EXPECT_EQ(1u, ultCsr->checkGpuHangDetectedCalled);
|
||||
EXPECT_EQ(0u, *hostAddress);
|
||||
@@ -2201,8 +2237,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti
|
||||
// timeout - not ready
|
||||
{
|
||||
forceFail = true;
|
||||
EXPECT_EQ(ZE_RESULT_NOT_READY, immCmdList->hostSynchronize(10));
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_NOT_READY, immCmdList->hostSynchronize(10, ultCsr->taskCount, false));
|
||||
EXPECT_TRUE(callCounter > 1);
|
||||
EXPECT_TRUE(ultCsr->checkGpuHangDetectedCalled > 1);
|
||||
EXPECT_EQ(0u, *hostAddress);
|
||||
@@ -2212,7 +2247,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti
|
||||
{
|
||||
ultCsr->forceReturnGpuHang = true;
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, immCmdList->hostSynchronize(10));
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, immCmdList->hostSynchronize(10, ultCsr->taskCount, false));
|
||||
|
||||
EXPECT_TRUE(callCounter > 1);
|
||||
EXPECT_TRUE(ultCsr->checkGpuHangDetectedCalled > 1);
|
||||
@@ -2225,12 +2260,21 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti
|
||||
ultCsr->forceReturnGpuHang = false;
|
||||
forceFail = false;
|
||||
callCounter = 0;
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, immCmdList->hostSynchronize(std::numeric_limits<uint64_t>::max()));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, immCmdList->hostSynchronize(std::numeric_limits<uint64_t>::max(), ultCsr->taskCount, false));
|
||||
|
||||
EXPECT_EQ(failCounter, callCounter);
|
||||
EXPECT_EQ(failCounter - 1, ultCsr->checkGpuHangDetectedCalled);
|
||||
EXPECT_EQ(1u, *hostAddress);
|
||||
}
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
*ultCsr->getTagAddress() = ultCsr->taskCount - 1;
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_NOT_READY, immCmdList->hostSynchronize(0, ultCsr->taskCount, true));
|
||||
|
||||
*ultCsr->getTagAddress() = ultCsr->taskCount + 1;
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, immCmdList->hostSynchronize(0, ultCsr->taskCount, true));
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDoingCpuCopyThenSynchronize, IsAtLeastXeHpCore) {
|
||||
@@ -2283,12 +2327,14 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenGpuHangDetectedInCpuCopyPathT
|
||||
auto immCmdList = createImmCmdList<gfxCoreFamily>();
|
||||
immCmdList->copyThroughLockedPtrEnabled = true;
|
||||
|
||||
auto eventPool = createEvents<FamilyType>(1, false);
|
||||
|
||||
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
|
||||
|
||||
auto hostAddress = static_cast<uint32_t *>(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
|
||||
*hostAddress = 0;
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
|
||||
|
||||
void *deviceAlloc = nullptr;
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
@@ -2476,6 +2522,8 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenSignalingSy
|
||||
HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenCallingSyncThenHandleCompletion, IsAtLeastXeHpCore) {
|
||||
auto immCmdList = createMultiTileImmCmdList<gfxCoreFamily>();
|
||||
|
||||
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
|
||||
|
||||
auto eventPool = createEvents<FamilyType>(1, false);
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
|
||||
@@ -2485,26 +2533,26 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenCallingSync
|
||||
|
||||
*hostAddress0 = 0;
|
||||
*hostAddress1 = 0;
|
||||
EXPECT_EQ(ZE_RESULT_NOT_READY, immCmdList->hostSynchronize(0));
|
||||
EXPECT_EQ(ZE_RESULT_NOT_READY, immCmdList->hostSynchronize(0, ultCsr->taskCount, false));
|
||||
EXPECT_EQ(ZE_RESULT_NOT_READY, events[0]->hostSynchronize(0));
|
||||
|
||||
*hostAddress0 = 1;
|
||||
EXPECT_EQ(ZE_RESULT_NOT_READY, immCmdList->hostSynchronize(0));
|
||||
EXPECT_EQ(ZE_RESULT_NOT_READY, immCmdList->hostSynchronize(0, ultCsr->taskCount, false));
|
||||
EXPECT_EQ(ZE_RESULT_NOT_READY, events[0]->hostSynchronize(0));
|
||||
|
||||
*hostAddress0 = 0;
|
||||
*hostAddress1 = 1;
|
||||
EXPECT_EQ(ZE_RESULT_NOT_READY, immCmdList->hostSynchronize(0));
|
||||
EXPECT_EQ(ZE_RESULT_NOT_READY, immCmdList->hostSynchronize(0, ultCsr->taskCount, false));
|
||||
EXPECT_EQ(ZE_RESULT_NOT_READY, events[0]->hostSynchronize(0));
|
||||
|
||||
*hostAddress0 = 1;
|
||||
*hostAddress1 = 1;
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, immCmdList->hostSynchronize(0));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, immCmdList->hostSynchronize(0, ultCsr->taskCount, false));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, events[0]->hostSynchronize(0));
|
||||
|
||||
*hostAddress0 = 3;
|
||||
*hostAddress1 = 3;
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, immCmdList->hostSynchronize(0));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, immCmdList->hostSynchronize(0, ultCsr->taskCount, false));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, events[0]->hostSynchronize(0));
|
||||
}
|
||||
|
||||
@@ -2518,6 +2566,7 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenProgramming
|
||||
|
||||
auto eventPool = createEvents<FamilyType>(1, true);
|
||||
auto eventHandle = events[0]->toHandle();
|
||||
events[0]->signalScope = 0;
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, eventHandle, 0, nullptr, launchParams, false);
|
||||
|
||||
@@ -2566,6 +2615,7 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenProgramming
|
||||
|
||||
auto eventPool = createEvents<FamilyType>(1, true);
|
||||
auto eventHandle = events[0]->toHandle();
|
||||
events[0]->signalScope = 0;
|
||||
|
||||
immCmdList->signalAllEventPackets = true;
|
||||
events[0]->maxPacketCount = 4;
|
||||
|
||||
Reference in New Issue
Block a user