mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-26 15:03:02 +08:00
feature: in-order CommandList host sync support
Related-To: NEO-7966 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
ae7cff3c3e
commit
50931b132b
@@ -177,7 +177,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
using BaseClass::inOrderDependencyCounterAllocation;
|
||||
|
||||
void printKernelsPrintfOutput(bool hangDetected);
|
||||
ze_result_t synchronizeInOrderExecution() const;
|
||||
ze_result_t synchronizeInOrderExecution(uint64_t timeout) const;
|
||||
|
||||
MOCKABLE_VIRTUAL void checkAssert();
|
||||
std::atomic<bool> dependenciesPresent{false};
|
||||
|
||||
@@ -690,22 +690,28 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchCooperati
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint64_t timeout) {
|
||||
auto syncTaskCount = this->csr->peekTaskCount();
|
||||
ze_result_t status = ZE_RESULT_SUCCESS;
|
||||
|
||||
if (this->isFlushTaskSubmissionEnabled && !this->isSyncModeQueue) {
|
||||
if (isInOrderExecutionEnabled()) {
|
||||
status = synchronizeInOrderExecution(timeout);
|
||||
} else if (this->isFlushTaskSubmissionEnabled && !this->isSyncModeQueue) {
|
||||
const int64_t timeoutInMicroSeconds = timeout / 1000;
|
||||
auto syncTaskCount = this->csr->peekTaskCount();
|
||||
const auto waitStatus = this->csr->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutInMicroSeconds},
|
||||
syncTaskCount);
|
||||
if (waitStatus == NEO::WaitStatus::GpuHang) {
|
||||
this->printKernelsPrintfOutput(true);
|
||||
this->checkAssert();
|
||||
return ZE_RESULT_ERROR_DEVICE_LOST;
|
||||
status = ZE_RESULT_ERROR_DEVICE_LOST;
|
||||
}
|
||||
this->csr->getInternalAllocationStorage()->cleanAllocationList(syncTaskCount, NEO::AllocationUsage::TEMPORARY_ALLOCATION);
|
||||
this->printKernelsPrintfOutput(false);
|
||||
this->checkAssert();
|
||||
}
|
||||
return ZE_RESULT_SUCCESS;
|
||||
|
||||
if (status == ZE_RESULT_SUCCESS) {
|
||||
this->csr->getInternalAllocationStorage()->cleanAllocationList(syncTaskCount, NEO::AllocationUsage::TEMPORARY_ALLOCATION);
|
||||
}
|
||||
|
||||
this->printKernelsPrintfOutput(status == ZE_RESULT_ERROR_DEVICE_LOST);
|
||||
this->checkAssert();
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -852,7 +858,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(cons
|
||||
}
|
||||
|
||||
if (isInOrderExecutionEnabled()) {
|
||||
auto status = synchronizeInOrderExecution();
|
||||
auto status = synchronizeInOrderExecution(std::numeric_limits<uint64_t>::max());
|
||||
if (status != ZE_RESULT_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
@@ -1061,23 +1067,42 @@ bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isRelaxedOrderingDispatchAll
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::synchronizeInOrderExecution() const {
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::synchronizeInOrderExecution(uint64_t timeout) const {
|
||||
std::chrono::high_resolution_clock::time_point waitStartTime, lastHangCheckTime, now;
|
||||
uint64_t timeDiff = 0;
|
||||
|
||||
ze_result_t status = ZE_RESULT_NOT_READY;
|
||||
|
||||
auto hostAddress = static_cast<uint32_t *>(this->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
|
||||
auto waitValue = this->inOrderDependencyCounter;
|
||||
|
||||
auto lastHangCheckTime = std::chrono::high_resolution_clock::now();
|
||||
lastHangCheckTime = std::chrono::high_resolution_clock::now();
|
||||
waitStartTime = lastHangCheckTime;
|
||||
|
||||
while (*hostAddress < waitValue) {
|
||||
do {
|
||||
this->csr->downloadAllocation(*this->inOrderDependencyCounterAllocation);
|
||||
|
||||
bool status = NEO::WaitUtils::waitFunctionWithPredicate<const uint32_t>(hostAddress, waitValue, std::greater_equal<uint32_t>());
|
||||
|
||||
if (!status && this->csr->checkGpuHangDetected(std::chrono::high_resolution_clock::now(), lastHangCheckTime)) {
|
||||
return ZE_RESULT_ERROR_DEVICE_LOST;
|
||||
if (NEO::WaitUtils::waitFunctionWithPredicate<const uint32_t>(hostAddress, waitValue, std::greater_equal<uint32_t>())) {
|
||||
status = ZE_RESULT_SUCCESS;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
if (this->csr->checkGpuHangDetected(std::chrono::high_resolution_clock::now(), lastHangCheckTime)) {
|
||||
status = ZE_RESULT_ERROR_DEVICE_LOST;
|
||||
break;
|
||||
}
|
||||
|
||||
if (timeout == std::numeric_limits<uint64_t>::max()) {
|
||||
continue;
|
||||
} else if (timeout == 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
now = std::chrono::high_resolution_clock::now();
|
||||
timeDiff = std::chrono::duration_cast<std::chrono::nanoseconds>(now - waitStartTime).count();
|
||||
} while (timeDiff < timeout);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -1127,19 +1127,57 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti
|
||||
|
||||
const uint32_t failCounter = 3;
|
||||
uint32_t callCounter = 0;
|
||||
bool forceFail = false;
|
||||
|
||||
ultCsr->downloadAllocationImpl = [&](GraphicsAllocation &graphicsAllocation) {
|
||||
callCounter++;
|
||||
if (callCounter >= failCounter) {
|
||||
*hostAddress = 1;
|
||||
if (callCounter >= failCounter && !forceFail) {
|
||||
(*hostAddress)++;
|
||||
}
|
||||
};
|
||||
|
||||
immCmdList->synchronizeInOrderExecution();
|
||||
// single check - not ready
|
||||
{
|
||||
EXPECT_EQ(ZE_RESULT_NOT_READY, immCmdList->hostSynchronize(0));
|
||||
|
||||
EXPECT_EQ(3u, callCounter);
|
||||
EXPECT_EQ(2u, ultCsr->checkGpuHangDetectedCalled);
|
||||
EXPECT_EQ(1u, *hostAddress);
|
||||
EXPECT_EQ(1u, callCounter);
|
||||
EXPECT_EQ(1u, ultCsr->checkGpuHangDetectedCalled);
|
||||
EXPECT_EQ(0u, *hostAddress);
|
||||
}
|
||||
|
||||
// timeout - not ready
|
||||
{
|
||||
forceFail = true;
|
||||
EXPECT_EQ(ZE_RESULT_NOT_READY, immCmdList->hostSynchronize(10));
|
||||
|
||||
EXPECT_TRUE(callCounter > 1);
|
||||
EXPECT_TRUE(ultCsr->checkGpuHangDetectedCalled > 1);
|
||||
EXPECT_EQ(0u, *hostAddress);
|
||||
}
|
||||
|
||||
// gpu hang
|
||||
{
|
||||
ultCsr->forceReturnGpuHang = true;
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, immCmdList->hostSynchronize(10));
|
||||
|
||||
EXPECT_TRUE(callCounter > 1);
|
||||
EXPECT_TRUE(ultCsr->checkGpuHangDetectedCalled > 1);
|
||||
EXPECT_EQ(0u, *hostAddress);
|
||||
}
|
||||
|
||||
// success
|
||||
{
|
||||
ultCsr->checkGpuHangDetectedCalled = 0;
|
||||
ultCsr->forceReturnGpuHang = false;
|
||||
forceFail = false;
|
||||
callCounter = 0;
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, immCmdList->hostSynchronize(std::numeric_limits<uint64_t>::max()));
|
||||
|
||||
EXPECT_EQ(failCounter, callCounter);
|
||||
EXPECT_EQ(failCounter - 1, ultCsr->checkGpuHangDetectedCalled);
|
||||
EXPECT_EQ(1u, *hostAddress);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDoingCpuCopyThenSynchronize, IsAtLeastXeHpCore) {
|
||||
|
||||
Reference in New Issue
Block a user