Prepare mechanism for returning GPU execution error on OCL API

translate task count value to OCL error

Related-To: NEO-7412
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2022-11-04 13:57:42 +00:00
committed by Compute-Runtime-Automation
parent ab6af4247e
commit 1c3d5c3892
22 changed files with 71 additions and 53 deletions

View File

@@ -60,6 +60,18 @@ CommandQueue *CommandQueue::create(Context *context,
return funcCreate(context, device, properties, internalUsage);
}
cl_int CommandQueue::getErrorCodeFromTaskCount(uint32_t taskCount) {
switch (taskCount) {
case CompletionStamp::gpuHang:
case CompletionStamp::outOfDeviceMemory:
return CL_OUT_OF_RESOURCES;
case CompletionStamp::outOfHostMemory:
return CL_OUT_OF_HOST_MEMORY;
default:
return CL_SUCCESS;
}
}
CommandQueue::CommandQueue(Context *context, ClDevice *device, const cl_queue_properties *properties, bool internalUsage)
: context(context), device(device), isInternalUsage(internalUsage) {
if (context) {

View File

@@ -58,6 +58,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
bool internalUsage,
cl_int &errcodeRet);
static cl_int getErrorCodeFromTaskCount(uint32_t taskCount);
CommandQueue() = delete;
CommandQueue(Context *context, ClDevice *device, const cl_queue_properties *properties, bool internalUsage);

View File

@@ -335,8 +335,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
this->latestSentEnqueueType = enqueueProperties.operation;
}
if (completionStamp.taskCount == CompletionStamp::gpuHang) {
return CL_OUT_OF_RESOURCES;
if (completionStamp.taskCount > CompletionStamp::notReady) {
return CommandQueue::getErrorCodeFromTaskCount(completionStamp.taskCount);
}
updateFromCompletionStamp(completionStamp, eventBuilder.getEvent());
@@ -825,14 +825,14 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
if (enqueueProperties.blitPropertiesContainer->size() > 0) {
auto bcsCsr = getBcsForAuxTranslation();
const auto newTaskCount = bcsCsr->flushBcsTask(*enqueueProperties.blitPropertiesContainer, false, this->isProfilingEnabled(), getDevice());
if (!newTaskCount) {
if (newTaskCount > CompletionStamp::notReady) {
CompletionStamp completionStamp{};
completionStamp.taskCount = CompletionStamp::gpuHang;
completionStamp.taskCount = newTaskCount;
return completionStamp;
}
this->updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), *newTaskCount);
this->updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), newTaskCount);
dispatchFlags.implicitFlush = true;
}
@@ -1063,14 +1063,14 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
if (enqueueProperties.operation == EnqueueProperties::Operation::Blit) {
UNRECOVERABLE_IF(!enqueueProperties.blitPropertiesContainer);
const auto newTaskCount = bcsCsr->flushBcsTask(*enqueueProperties.blitPropertiesContainer, false, this->isProfilingEnabled(), getDevice());
if (!newTaskCount) {
if (newTaskCount > CompletionStamp::notReady) {
CompletionStamp completionStamp{};
completionStamp.taskCount = CompletionStamp::gpuHang;
completionStamp.taskCount = newTaskCount;
return completionStamp;
}
this->updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), *newTaskCount);
this->updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), newTaskCount);
}
return completionStamp;
@@ -1276,8 +1276,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
completionStamp = enqueueCommandWithoutKernel(nullptr, 0, gpgpuCommandStream, gpgpuCommandStreamStart, blocking,
enqueueProperties, timestampPacketDependencies, eventsRequest,
eventBuilder, taskLevel, csrDeps, &bcsCsr);
if (completionStamp.taskCount == CompletionStamp::gpuHang) {
return CL_OUT_OF_RESOURCES;
if (completionStamp.taskCount > CompletionStamp::notReady) {
return CommandQueue::getErrorCodeFromTaskCount(completionStamp.taskCount);
}
if (gpgpuSubmission) {

View File

@@ -597,7 +597,7 @@ void Event::submitCommand(bool abortTasks) {
setEndTimeStamp();
}
if (complStamp.taskCount == CompletionStamp::gpuHang) {
if (complStamp.taskCount > CompletionStamp::notReady) {
abortExecutionDueToGpuHang();
return;
}

View File

@@ -258,14 +258,12 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
commandQueue.clearLastBcsPackets();
}
bool isGpuHangDetected{false};
if (kernelOperation->blitPropertiesContainer.size() > 0) {
const auto newTaskCount = bcsCsrForAuxTranslation->flushBcsTask(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled(), commandQueue.getDevice());
if (newTaskCount) {
commandQueue.updateBcsTaskCount(bcsCsrForAuxTranslation->getOsContext().getEngineType(), *newTaskCount);
if (newTaskCount <= CompletionStamp::notReady) {
commandQueue.updateBcsTaskCount(bcsCsrForAuxTranslation->getOsContext().getEngineType(), newTaskCount);
} else {
isGpuHangDetected = true;
completionStamp.taskCount = newTaskCount;
}
}
commandQueue.updateLatestSentEnqueueType(EnqueueProperties::Operation::GpuKernel);
@@ -277,11 +275,11 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
if (printfHandler) {
const auto waitStatus = commandQueue.waitUntilComplete(completionStamp.taskCount, {}, completionStamp.flushStamp, false);
if (waitStatus == WaitStatus::GpuHang) {
isGpuHangDetected = true;
completionStamp.taskCount = CompletionStamp::gpuHang;
}
if (!printfHandler->printEnqueueOutput()) {
isGpuHangDetected = true;
completionStamp.taskCount = CompletionStamp::gpuHang;
}
}
@@ -290,14 +288,10 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
}
surfaces.clear();
if (isGpuHangDetected) {
completionStamp.taskCount = CompletionStamp::gpuHang;
}
return completionStamp;
}
bool CommandWithoutKernel::dispatchBlitOperation() {
uint32_t CommandWithoutKernel::dispatchBlitOperation() {
auto bcsCsr = kernelOperation->bcsCsr;
UNRECOVERABLE_IF(bcsCsr == nullptr);
@@ -314,14 +308,14 @@ bool CommandWithoutKernel::dispatchBlitOperation() {
}
const auto newTaskCount = bcsCsr->flushBcsTask(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled(), commandQueue.getDevice());
if (!newTaskCount) {
return false;
if (newTaskCount > CompletionStamp::notReady) {
return newTaskCount;
}
commandQueue.updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), *newTaskCount);
commandQueue.updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), newTaskCount);
commandQueue.setLastBcsPacket(bcsCsr->getOsContext().getEngineType());
return true;
return newTaskCount;
}
CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminated) {
@@ -420,8 +414,9 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate
}
if (kernelOperation->blitEnqueue) {
if (!dispatchBlitOperation()) {
completionStamp.taskCount = CompletionStamp::gpuHang;
auto taskCount = dispatchBlitOperation();
if (taskCount > CompletionStamp::notReady) {
completionStamp.taskCount = taskCount;
}
}

View File

@@ -156,6 +156,6 @@ class CommandWithoutKernel : public Command {
public:
using Command::Command;
CompletionStamp &submit(uint32_t taskLevel, bool terminated) override;
bool dispatchBlitOperation();
uint32_t dispatchBlitOperation();
};
} // namespace NEO

View File

@@ -98,7 +98,7 @@ bool PrintfHandler::printEnqueueOutput() {
0, 0, 0, Vec3<size_t>(printfOutputSize, 0, 0), 0, 0, 0, 0));
const auto newTaskCount = bcsEngine.commandStreamReceiver->flushBcsTask(blitPropertiesContainer, true, false, device);
if (!newTaskCount) {
if (newTaskCount > CompletionStamp::notReady) {
return false;
}
}

View File

@@ -113,7 +113,7 @@ HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenGpuHangOnFlushBcsAndBlitAuxTran
auto ultBcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsCsr);
ultBcsCsr->callBaseFlushBcsTask = false;
ultBcsCsr->flushBcsTaskReturnValue = std::nullopt;
ultBcsCsr->flushBcsTaskReturnValue = CompletionStamp::gpuHang;
auto mockCmdQ = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
@@ -666,7 +666,7 @@ HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenGpuHangOnFlushBcsTaskAndBlitTra
auto ultBcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsCsr);
ultBcsCsr->callBaseFlushBcsTask = false;
ultBcsCsr->flushBcsTaskReturnValue = std::nullopt;
ultBcsCsr->flushBcsTaskReturnValue = CompletionStamp::gpuHang;
UserEvent userEvent;
cl_event waitlist[] = {&userEvent};
@@ -943,7 +943,7 @@ HWTEST_TEMPLATED_F(BlitEnqueueWithDebugCapabilityTests, givenDebugFlagSetWhenDis
HWTEST_TEMPLATED_F(BlitEnqueueWithDebugCapabilityTests, givenGpuHangOnFlushBcsTaskAndDebugFlagSetWhenDispatchingBlitEnqueueThenOutOfResourcesIsReturned) {
auto ultBcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsCsr);
ultBcsCsr->callBaseFlushBcsTask = false;
ultBcsCsr->flushBcsTaskReturnValue = std::nullopt;
ultBcsCsr->flushBcsTaskReturnValue = CompletionStamp::gpuHang;
buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
@@ -1194,7 +1194,7 @@ HWTEST_TEMPLATED_F(BlitEnqueueFlushTests, givenGpuHangOnFlushBcsTaskAndBlockedQu
auto myUltBcsCsr = static_cast<MyUltCsr<FamilyType> *>(bcsCsr);
myUltBcsCsr->flushCounter = &flushCounter;
myUltBcsCsr->callBaseFlushBcsTask = false;
myUltBcsCsr->flushBcsTaskReturnValue = std::nullopt;
myUltBcsCsr->flushBcsTaskReturnValue = CompletionStamp::gpuHang;
UserEvent userEvent;
cl_event waitlist[] = {&userEvent};

View File

@@ -128,6 +128,13 @@ INSTANTIATE_TEST_CASE_P(CommandQueue,
CommandQueueTest,
::testing::ValuesIn(AllCommandQueueProperties));
TEST(CommandQueue, WhenGettingErrorCodeFromTaskCountThenProperValueIsReturned) {
EXPECT_EQ(CL_SUCCESS, CommandQueue::getErrorCodeFromTaskCount(0));
EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, CommandQueue::getErrorCodeFromTaskCount(CompletionStamp::outOfHostMemory));
EXPECT_EQ(CL_OUT_OF_RESOURCES, CommandQueue::getErrorCodeFromTaskCount(CompletionStamp::outOfDeviceMemory));
EXPECT_EQ(CL_OUT_OF_RESOURCES, CommandQueue::getErrorCodeFromTaskCount(CompletionStamp::gpuHang));
}
TEST(CommandQueue, WhenConstructingCommandQueueThenTaskLevelAndTaskCountAreZero) {
MockCommandQueue cmdQ(nullptr, nullptr, 0, false);
EXPECT_EQ(0u, cmdQ.taskLevel);

View File

@@ -650,12 +650,12 @@ HWTEST_F(BcsTests, GivenNoneGpuHangWhenBlitFromHostPtrCalledThenCallWaitWithKmdF
0, 0, {1, 1, 1}, 0, 0, 0, 0);
const auto taskCount1 = flushBcsTask(myMockCsr.get(), blitProperties, false, *pDevice);
EXPECT_TRUE(taskCount1.has_value());
EXPECT_NE(CompletionStamp::gpuHang, taskCount1);
EXPECT_EQ(0u, myMockCsr->waitForTaskCountWithKmdNotifyFallbackCalled);
const auto taskCount2 = flushBcsTask(myMockCsr.get(), blitProperties, true, *pDevice);
EXPECT_TRUE(taskCount2.has_value());
EXPECT_NE(CompletionStamp::gpuHang, taskCount2);
EXPECT_EQ(1u, myMockCsr->waitForTaskCountWithKmdNotifyFallbackCalled);
EXPECT_EQ(myMockCsr->taskCount, myMockCsr->taskCountToWaitPassed);
@@ -687,14 +687,14 @@ HWTEST_F(BcsTests, GivenGpuHangWhenBlitFromHostPtrCalledThenCallWaitWithKmdFallb
0, 0, {1, 1, 1}, 0, 0, 0, 0);
const auto taskCount1 = flushBcsTask(myMockCsr.get(), blitProperties, false, *pDevice);
EXPECT_TRUE(taskCount1.has_value());
EXPECT_NE(CompletionStamp::gpuHang, taskCount1);
EXPECT_EQ(0u, myMockCsr->waitForTaskCountWithKmdNotifyFallbackCalled);
myMockCsr->waitForTaskCountWithKmdNotifyFallbackReturnValue = WaitStatus::GpuHang;
const auto taskCount2 = flushBcsTask(myMockCsr.get(), blitProperties, true, *pDevice);
EXPECT_FALSE(taskCount2.has_value());
EXPECT_EQ(CompletionStamp::gpuHang, taskCount2);
EXPECT_EQ(1u, myMockCsr->waitForTaskCountWithKmdNotifyFallbackCalled);
EXPECT_EQ(myMockCsr->taskCount, myMockCsr->taskCountToWaitPassed);

View File

@@ -22,7 +22,7 @@ struct BcsTests : public Test<ClDeviceFixture> {
Test<ClDeviceFixture>::TearDown();
}
std::optional<uint32_t> flushBcsTask(CommandStreamReceiver *bcsCsr, const BlitProperties &blitProperties, bool blocking, Device &device) {
uint32_t flushBcsTask(CommandStreamReceiver *bcsCsr, const BlitProperties &blitProperties, bool blocking, Device &device) {
BlitPropertiesContainer container;
container.push_back(blitProperties);

View File

@@ -559,7 +559,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ContextCreateTests, givenGpuHangOnFlushBcsTaskAndLo
auto ultBcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsEngine->commandStreamReceiver);
ultBcsCsr->callBaseFlushBcsTask = false;
ultBcsCsr->flushBcsTaskReturnValue = std::nullopt;
ultBcsCsr->flushBcsTaskReturnValue = CompletionStamp::gpuHang;
EXPECT_EQ(BlitOperationResult::GpuHang, BlitHelper::blitMemoryToAllocation(buffer->getContext()->getDevice(0)->getDevice(), memory, buffer->getOffset(), hostMemory, {1, 1, 1}));
}

View File

@@ -543,7 +543,7 @@ class CommandStreamReceiverMock : public CommandStreamReceiver {
WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, QueueThrottle throttle) override {
return WaitStatus::Ready;
}
std::optional<uint32_t> flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; };
uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; };
CompletionStamp flushTask(
LinearStream &commandStream,

View File

@@ -183,7 +183,7 @@ HWTEST_F(PrintfHandlerTests, givenGpuHangOnFlushBcsStreamAndEnabledStatelessComp
auto &bcsEngine = device->getEngine(EngineHelpers::getBcsEngineType(device->getHardwareInfo(), device->getDeviceBitfield(), device->getSelectorCopyEngine(), true), EngineUsage::Regular);
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsEngine.commandStreamReceiver);
bcsCsr->callBaseFlushBcsTask = false;
bcsCsr->flushBcsTaskReturnValue = std::nullopt;
bcsCsr->flushBcsTaskReturnValue = CompletionStamp::gpuHang;
EXPECT_FALSE(printfHandler->printEnqueueOutput());
EXPECT_EQ(1u, bcsCsr->blitBufferCalled);

View File

@@ -233,7 +233,7 @@ class CommandStreamReceiver {
this->latestFlushedTaskCount = latestFlushedTaskCount;
}
virtual std::optional<uint32_t> flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) = 0;
virtual uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) = 0;
virtual void flushTagUpdate() = 0;
virtual void updateTagFromWait() = 0;

View File

@@ -97,7 +97,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
return CommandStreamReceiverType::CSR_HW;
}
std::optional<uint32_t> flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override;
uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override;
void flushTagUpdate() override;
void flushMiFlushDW();

View File

@@ -1057,7 +1057,7 @@ inline void CommandStreamReceiverHw<GfxFamily>::unregisterDirectSubmissionFromCo
}
template <typename GfxFamily>
std::optional<uint32_t> CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) {
uint32_t CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) {
using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;
using MI_FLUSH_DW = typename GfxFamily::MI_FLUSH_DW;
@@ -1180,7 +1180,7 @@ std::optional<uint32_t> CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const B
internalAllocationStorage->cleanAllocationList(newTaskCount, TEMPORARY_ALLOCATION);
if (waitStatus == WaitStatus::GpuHang) {
return std::nullopt;
return CompletionStamp::gpuHang;
}
}

View File

@@ -211,7 +211,7 @@ BlitOperationResult BlitHelper::blitMemoryToAllocationBanks(const Device &device
0, 0, 0, size, 0, 0, 0, 0));
const auto newTaskCount = bcsEngine->commandStreamReceiver->flushBcsTask(blitPropertiesContainer, true, false, *pDeviceForBlit);
if (!newTaskCount) {
if (newTaskCount == CompletionStamp::gpuHang) {
return BlitOperationResult::GpuHang;
}
}

View File

@@ -18,6 +18,8 @@ struct CompletionStamp {
static constexpr uint32_t notReady = 0xFFFFFFF0;
static constexpr uint32_t gpuHang = 0xFFFFFFFA;
static constexpr uint32_t outOfDeviceMemory = 0xFFFFFFFB;
static constexpr uint32_t outOfHostMemory = 0xFFFFFFFC;
};
} // namespace NEO

View File

@@ -49,7 +49,7 @@ class UltAubCommandStreamReceiver : public AUBCommandStreamReceiverHw<GfxFamily>
return csr;
}
std::optional<uint32_t> flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override {
uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override {
blitBufferCalled++;
return BaseClass::flushBcsTask(blitPropertiesContainer, blocking, profilingEnabled, device);
}

View File

@@ -285,7 +285,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
return CommandStreamReceiverHw<GfxFamily>::obtainUniqueOwnership();
}
std::optional<uint32_t> flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override {
uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override {
blitBufferCalled++;
receivedBlitProperties = blitPropertiesContainer;
@@ -392,7 +392,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
WaitStatus returnWaitForCompletionWithTimeout = WaitStatus::Ready;
std::optional<WaitStatus> waitForTaskCountWithKmdNotifyFallbackReturnValue{};
bool callBaseFlushBcsTask{true};
std::optional<uint32_t> flushBcsTaskReturnValue{};
uint32_t flushBcsTaskReturnValue{};
};
} // namespace NEO

View File

@@ -119,7 +119,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
return WaitStatus::Ready;
}
std::optional<uint32_t> flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; };
uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; };
CommandStreamReceiverType getType() override {
return CommandStreamReceiverType::CSR_HW;
@@ -305,7 +305,7 @@ class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
return completionStamp;
}
std::optional<uint32_t> flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override {
uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override {
if (!skipBlitCalls) {
return CommandStreamReceiverHw<GfxFamily>::flushBcsTask(blitPropertiesContainer, blocking, profilingEnabled, device);
}