mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-20 00:24:58 +08:00
feature: CPU copy path for in-order CommandList
Related-To: NEO-7966 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
5a908f6634
commit
7b207d5e11
@@ -40,6 +40,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using BaseClass = CommandListCoreFamily<gfxCoreFamily>;
|
||||
using BaseClass::BaseClass;
|
||||
using BaseClass::copyThroughLockedPtrEnabled;
|
||||
using BaseClass::executeCommandListImmediate;
|
||||
using BaseClass::isCopyOnly;
|
||||
using BaseClass::isInOrderExecutionEnabled;
|
||||
@@ -174,6 +175,8 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
using BaseClass::inOrderDependencyCounterAllocation;
|
||||
|
||||
void printKernelsPrintfOutput(bool hangDetected);
|
||||
ze_result_t synchronizeInOrderExecution() const;
|
||||
|
||||
MOCKABLE_VIRTUAL void checkAssert();
|
||||
std::atomic<bool> dependenciesPresent{false};
|
||||
};
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
#include "shared/source/memory_manager/internal_allocation_storage.h"
|
||||
#include "shared/source/memory_manager/unified_memory_manager.h"
|
||||
#include "shared/source/os_interface/os_context.h"
|
||||
#include "shared/source/utilities/wait_util.h"
|
||||
|
||||
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
|
||||
#include "level_zero/core/source/cmdqueue/cmdqueue_hw.h"
|
||||
@@ -787,6 +788,10 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(cons
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
if (isInOrderExecutionEnabled()) {
|
||||
this->dependenciesPresent = false; // wait only for waitlist and in-order sync value
|
||||
}
|
||||
|
||||
if (numWaitEvents > 0) {
|
||||
uint32_t numEventsThreshold = 5;
|
||||
if (NEO::DebugManager.flags.ExperimentalCopyThroughLockWaitlistSizeThreshold.get() != -1) {
|
||||
@@ -826,6 +831,13 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(cons
|
||||
this->dependenciesPresent = false;
|
||||
}
|
||||
|
||||
if (isInOrderExecutionEnabled()) {
|
||||
auto status = synchronizeInOrderExecution();
|
||||
if (status != ZE_RESULT_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
if (signalEvent) {
|
||||
signalEvent->setGpuStartTimestamp();
|
||||
}
|
||||
@@ -1028,4 +1040,24 @@ bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isRelaxedOrderingDispatchAll
|
||||
return NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numEvents);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::synchronizeInOrderExecution() const {
|
||||
auto hostAddress = static_cast<uint32_t *>(this->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
|
||||
auto waitValue = this->inOrderDependencyCounter;
|
||||
|
||||
auto lastHangCheckTime = std::chrono::high_resolution_clock::now();
|
||||
|
||||
while (*hostAddress < waitValue) {
|
||||
this->csr->downloadAllocation(*this->inOrderDependencyCounterAllocation);
|
||||
|
||||
bool status = NEO::WaitUtils::waitFunctionWithPredicate<const uint32_t>(hostAddress, waitValue, std::greater_equal<uint32_t>());
|
||||
|
||||
if (!status && this->csr->checkGpuHangDetected(std::chrono::high_resolution_clock::now(), lastHangCheckTime)) {
|
||||
return ZE_RESULT_ERROR_DEVICE_LOST;
|
||||
}
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -173,6 +173,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
|
||||
using BaseClass::signalAllEventPackets;
|
||||
using BaseClass::stateBaseAddressTracking;
|
||||
using BaseClass::stateComputeModeTracking;
|
||||
using BaseClass::synchronizeInOrderExecution;
|
||||
|
||||
WhiteBox() : BaseClass(BaseClass::defaultNumIddsPerBlock) {}
|
||||
};
|
||||
|
||||
@@ -1115,6 +1115,107 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierThenS
|
||||
EXPECT_EQ(2u, pcCmd->getImmediateData());
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompletion, IsAtLeastXeHpCore) {
|
||||
auto immCmdList = createImmCmdList<gfxCoreFamily>();
|
||||
|
||||
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
|
||||
auto hostAddress = static_cast<uint32_t *>(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
|
||||
*hostAddress = 0;
|
||||
|
||||
const uint32_t failCounter = 3;
|
||||
uint32_t callCounter = 0;
|
||||
|
||||
ultCsr->downloadAllocationImpl = [&](GraphicsAllocation &graphicsAllocation) {
|
||||
callCounter++;
|
||||
if (callCounter >= failCounter) {
|
||||
*hostAddress = 1;
|
||||
}
|
||||
};
|
||||
|
||||
immCmdList->synchronizeInOrderExecution();
|
||||
|
||||
EXPECT_EQ(3u, callCounter);
|
||||
EXPECT_EQ(2u, ultCsr->checkGpuHangDetectedCalled);
|
||||
EXPECT_EQ(1u, *hostAddress);
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDoingCpuCopyThenSynchronize, IsAtLeastXeHpCore) {
|
||||
auto immCmdList = createImmCmdList<gfxCoreFamily>();
|
||||
immCmdList->copyThroughLockedPtrEnabled = true;
|
||||
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
|
||||
|
||||
auto eventPool = createEvents(1, false);
|
||||
|
||||
auto eventHandle = events[0]->toHandle();
|
||||
|
||||
auto hostAddress = static_cast<uint32_t *>(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
|
||||
*hostAddress = 0;
|
||||
|
||||
const uint32_t failCounter = 3;
|
||||
uint32_t callCounter = 0;
|
||||
|
||||
ultCsr->downloadAllocationImpl = [&](GraphicsAllocation &graphicsAllocation) {
|
||||
callCounter++;
|
||||
if (callCounter >= failCounter) {
|
||||
(*hostAddress)++;
|
||||
}
|
||||
};
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, eventHandle, 0, nullptr, launchParams, false);
|
||||
events[0]->setIsCompleted();
|
||||
|
||||
ultCsr->waitForCompletionWithTimeoutTaskCountCalled = 0;
|
||||
ultCsr->flushTagUpdateCalled = false;
|
||||
|
||||
void *deviceAlloc = nullptr;
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 128, 128, &deviceAlloc);
|
||||
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
|
||||
uint32_t hostCopyData = 0;
|
||||
|
||||
immCmdList->appendMemoryCopy(deviceAlloc, &hostCopyData, 1, nullptr, 1, &eventHandle, false);
|
||||
|
||||
EXPECT_EQ(3u, callCounter);
|
||||
EXPECT_EQ(1u, *hostAddress);
|
||||
EXPECT_EQ(2u, ultCsr->checkGpuHangDetectedCalled);
|
||||
EXPECT_EQ(0u, ultCsr->waitForCompletionWithTimeoutTaskCountCalled);
|
||||
EXPECT_FALSE(ultCsr->flushTagUpdateCalled);
|
||||
|
||||
context->freeMem(deviceAlloc);
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenGpuHangDetectedInCpuCopyPathThenReportError, IsAtLeastXeHpCore) {
|
||||
auto immCmdList = createImmCmdList<gfxCoreFamily>();
|
||||
immCmdList->copyThroughLockedPtrEnabled = true;
|
||||
|
||||
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
|
||||
|
||||
auto hostAddress = static_cast<uint32_t *>(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
|
||||
*hostAddress = 0;
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
|
||||
void *deviceAlloc = nullptr;
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 128, 128, &deviceAlloc);
|
||||
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
|
||||
uint32_t hostCopyData = 0;
|
||||
|
||||
ultCsr->forceReturnGpuHang = true;
|
||||
|
||||
auto status = immCmdList->appendMemoryCopy(deviceAlloc, &hostCopyData, 1, nullptr, 0, nullptr, false);
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, status);
|
||||
|
||||
ultCsr->forceReturnGpuHang = false;
|
||||
|
||||
context->freeMem(deviceAlloc);
|
||||
}
|
||||
|
||||
struct CommandListAppendLaunchKernelWithImplicitArgs : CommandListAppendLaunchKernel {
|
||||
template <typename FamilyType>
|
||||
uint64_t getIndirectHeapOffsetForImplicitArgsBuffer(const Mock<::L0::Kernel> &kernel) {
|
||||
|
||||
@@ -379,6 +379,15 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
pollForCompletionCalled++;
|
||||
}
|
||||
|
||||
bool checkGpuHangDetected(CommandStreamReceiver::TimeType currentTime, CommandStreamReceiver::TimeType &lastHangCheckTime) const override {
|
||||
checkGpuHangDetectedCalled++;
|
||||
if (forceReturnGpuHang) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return BaseClass::checkGpuHangDetected(currentTime, lastHangCheckTime);
|
||||
}
|
||||
|
||||
SubmissionStatus sendRenderStateCacheFlush() override {
|
||||
if (callBaseSendRenderStateCacheFlush) {
|
||||
return BaseClass::sendRenderStateCacheFlush();
|
||||
@@ -404,6 +413,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
uint32_t initDirectSubmissionCalled = 0;
|
||||
uint32_t fillReusableAllocationsListCalled = 0;
|
||||
uint32_t pollForCompletionCalled = 0;
|
||||
mutable uint32_t checkGpuHangDetectedCalled = 0;
|
||||
int ensureCommandBufferAllocationCalled = 0;
|
||||
DispatchFlags recordedDispatchFlags;
|
||||
BlitPropertiesContainer receivedBlitProperties = {};
|
||||
@@ -435,6 +445,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
bool callBaseFillReusableAllocationsList = false;
|
||||
bool callBaseFlushBcsTask{true};
|
||||
bool callBaseSendRenderStateCacheFlush = true;
|
||||
bool forceReturnGpuHang = false;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
Reference in New Issue
Block a user