feature: in-order CommandList host sync support

Related-To: NEO-7966

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2023-05-18 13:02:45 +00:00
committed by Compute-Runtime-Automation
parent ae7cff3c3e
commit 50931b132b
3 changed files with 89 additions and 26 deletions

View File

@@ -177,7 +177,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
using BaseClass::inOrderDependencyCounterAllocation;
void printKernelsPrintfOutput(bool hangDetected);
ze_result_t synchronizeInOrderExecution() const;
ze_result_t synchronizeInOrderExecution(uint64_t timeout) const;
MOCKABLE_VIRTUAL void checkAssert();
std::atomic<bool> dependenciesPresent{false};

View File

@@ -690,22 +690,28 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchCooperati
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint64_t timeout) {
auto syncTaskCount = this->csr->peekTaskCount();
ze_result_t status = ZE_RESULT_SUCCESS;
if (this->isFlushTaskSubmissionEnabled && !this->isSyncModeQueue) {
if (isInOrderExecutionEnabled()) {
status = synchronizeInOrderExecution(timeout);
} else if (this->isFlushTaskSubmissionEnabled && !this->isSyncModeQueue) {
const int64_t timeoutInMicroSeconds = timeout / 1000;
auto syncTaskCount = this->csr->peekTaskCount();
const auto waitStatus = this->csr->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutInMicroSeconds},
syncTaskCount);
if (waitStatus == NEO::WaitStatus::GpuHang) {
this->printKernelsPrintfOutput(true);
this->checkAssert();
return ZE_RESULT_ERROR_DEVICE_LOST;
status = ZE_RESULT_ERROR_DEVICE_LOST;
}
this->csr->getInternalAllocationStorage()->cleanAllocationList(syncTaskCount, NEO::AllocationUsage::TEMPORARY_ALLOCATION);
this->printKernelsPrintfOutput(false);
this->checkAssert();
}
return ZE_RESULT_SUCCESS;
if (status == ZE_RESULT_SUCCESS) {
this->csr->getInternalAllocationStorage()->cleanAllocationList(syncTaskCount, NEO::AllocationUsage::TEMPORARY_ALLOCATION);
}
this->printKernelsPrintfOutput(status == ZE_RESULT_ERROR_DEVICE_LOST);
this->checkAssert();
return status;
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -852,7 +858,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(cons
}
if (isInOrderExecutionEnabled()) {
auto status = synchronizeInOrderExecution();
auto status = synchronizeInOrderExecution(std::numeric_limits<uint64_t>::max());
if (status != ZE_RESULT_SUCCESS) {
return status;
}
@@ -1061,23 +1067,42 @@ bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isRelaxedOrderingDispatchAll
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::synchronizeInOrderExecution() const {
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::synchronizeInOrderExecution(uint64_t timeout) const {
std::chrono::high_resolution_clock::time_point waitStartTime, lastHangCheckTime, now;
uint64_t timeDiff = 0;
ze_result_t status = ZE_RESULT_NOT_READY;
auto hostAddress = static_cast<uint32_t *>(this->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
auto waitValue = this->inOrderDependencyCounter;
auto lastHangCheckTime = std::chrono::high_resolution_clock::now();
lastHangCheckTime = std::chrono::high_resolution_clock::now();
waitStartTime = lastHangCheckTime;
while (*hostAddress < waitValue) {
do {
this->csr->downloadAllocation(*this->inOrderDependencyCounterAllocation);
bool status = NEO::WaitUtils::waitFunctionWithPredicate<const uint32_t>(hostAddress, waitValue, std::greater_equal<uint32_t>());
if (!status && this->csr->checkGpuHangDetected(std::chrono::high_resolution_clock::now(), lastHangCheckTime)) {
return ZE_RESULT_ERROR_DEVICE_LOST;
if (NEO::WaitUtils::waitFunctionWithPredicate<const uint32_t>(hostAddress, waitValue, std::greater_equal<uint32_t>())) {
status = ZE_RESULT_SUCCESS;
break;
}
}
return ZE_RESULT_SUCCESS;
if (this->csr->checkGpuHangDetected(std::chrono::high_resolution_clock::now(), lastHangCheckTime)) {
status = ZE_RESULT_ERROR_DEVICE_LOST;
break;
}
if (timeout == std::numeric_limits<uint64_t>::max()) {
continue;
} else if (timeout == 0) {
break;
}
now = std::chrono::high_resolution_clock::now();
timeDiff = std::chrono::duration_cast<std::chrono::nanoseconds>(now - waitStartTime).count();
} while (timeDiff < timeout);
return status;
}
} // namespace L0

View File

@@ -1127,19 +1127,57 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti
const uint32_t failCounter = 3;
uint32_t callCounter = 0;
bool forceFail = false;
ultCsr->downloadAllocationImpl = [&](GraphicsAllocation &graphicsAllocation) {
callCounter++;
if (callCounter >= failCounter) {
*hostAddress = 1;
if (callCounter >= failCounter && !forceFail) {
(*hostAddress)++;
}
};
immCmdList->synchronizeInOrderExecution();
// single check - not ready
{
EXPECT_EQ(ZE_RESULT_NOT_READY, immCmdList->hostSynchronize(0));
EXPECT_EQ(3u, callCounter);
EXPECT_EQ(2u, ultCsr->checkGpuHangDetectedCalled);
EXPECT_EQ(1u, *hostAddress);
EXPECT_EQ(1u, callCounter);
EXPECT_EQ(1u, ultCsr->checkGpuHangDetectedCalled);
EXPECT_EQ(0u, *hostAddress);
}
// timeout - not ready
{
forceFail = true;
EXPECT_EQ(ZE_RESULT_NOT_READY, immCmdList->hostSynchronize(10));
EXPECT_TRUE(callCounter > 1);
EXPECT_TRUE(ultCsr->checkGpuHangDetectedCalled > 1);
EXPECT_EQ(0u, *hostAddress);
}
// gpu hang
{
ultCsr->forceReturnGpuHang = true;
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, immCmdList->hostSynchronize(10));
EXPECT_TRUE(callCounter > 1);
EXPECT_TRUE(ultCsr->checkGpuHangDetectedCalled > 1);
EXPECT_EQ(0u, *hostAddress);
}
// success
{
ultCsr->checkGpuHangDetectedCalled = 0;
ultCsr->forceReturnGpuHang = false;
forceFail = false;
callCounter = 0;
EXPECT_EQ(ZE_RESULT_SUCCESS, immCmdList->hostSynchronize(std::numeric_limits<uint64_t>::max()));
EXPECT_EQ(failCounter, callCounter);
EXPECT_EQ(failCounter - 1, ultCsr->checkGpuHangDetectedCalled);
EXPECT_EQ(1u, *hostAddress);
}
}
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDoingCpuCopyThenSynchronize, IsAtLeastXeHpCore) {