Propagate exec buffer error to L0 API level on Xe HPC

This change makes that drm file is opened in nonblocking mode for prelim
kernels. In such case when calling exec buffer ioctl and get
EAGAIN (aka EWOULDBLOCK) we may return error to API level

Related-To: NEO-7144

Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2022-10-28 09:25:16 +00:00
committed by Compute-Runtime-Automation
parent a9ba581d97
commit 9816f815f3
26 changed files with 312 additions and 40 deletions

View File

@@ -1117,6 +1117,9 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::handleSubmissionAndCompletionResults(
if (submitRet == NEO::SubmissionStatus::OUT_OF_MEMORY) {
completionRet = ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
}
if (submitRet == NEO::SubmissionStatus::OUT_OF_HOST_MEMORY) {
completionRet = ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY;
}
}
return completionRet;

View File

@@ -92,6 +92,9 @@ struct MockCommandQueueHw : public L0::CommandQueueHw<gfxCoreFamily> {
NEO::SubmissionStatus submitBatchBuffer(size_t offset, NEO::ResidencyContainer &residencyContainer, void *endingCmdPtr, bool isCooperative) override {
residencyContainerSnapshot = residencyContainer;
if (submitBatchBufferReturnValue.has_value()) {
return *submitBatchBufferReturnValue;
}
return BaseClass::submitBatchBuffer(offset, residencyContainer, endingCmdPtr, isCooperative);
}
@@ -99,6 +102,7 @@ struct MockCommandQueueHw : public L0::CommandQueueHw<gfxCoreFamily> {
NEO::ResidencyContainer residencyContainerSnapshot;
ze_result_t synchronizeReturnValue{ZE_RESULT_SUCCESS};
std::optional<NEO::WaitStatus> reserveLinearStreamSizeReturnValue{};
std::optional<NEO::SubmissionStatus> submitBatchBufferReturnValue{};
};
struct Deleter {
@@ -108,4 +112,4 @@ struct Deleter {
};
} // namespace ult
} // namespace L0
} // namespace L0

View File

@@ -0,0 +1,12 @@
#
# Copyright (C) 2022 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
if(UNIX)
target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_linux_tests.cpp
)
endif()

View File

@@ -0,0 +1,74 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/helpers/ult_hw_config.h"
#include "shared/test/common/helpers/variable_backup.h"
#include "shared/test/common/libult/create_command_stream.h"
#include "shared/test/common/libult/linux/drm_mock.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
namespace L0 {
namespace ult {
struct CommandQueueLinuxTests : public Test<DeviceFixture> {
void SetUp() override {
VariableBackup<UltHwConfig> backup(&ultHwConfig);
ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false;
ultHwConfig.useHwCsr = true;
ultHwConfig.forceOsAgnosticMemoryManager = false;
auto *executionEnvironment = new NEO::ExecutionEnvironment();
prepareDeviceEnvironments(*executionEnvironment);
executionEnvironment->initializeMemoryManager();
setupWithExecutionEnvironment(*executionEnvironment);
}
};
HWTEST2_F(CommandQueueLinuxTests, givenExecBufferErrorOnXeHpcWhenExecutingCommandListsThenOutOfHostMemoryIsReturned, IsXeHpcCore) {
auto drm = neoDevice->getRootDeviceEnvironment().osInterface->getDriverModel()->as<DrmMock>();
drm->execBufferResult = -1;
drm->baseErrno = false;
drm->errnoRetVal = EWOULDBLOCK;
const ze_command_queue_desc_t desc = {};
ze_result_t returnValue;
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
device,
neoDevice->getDefaultEngine().commandStreamReceiver,
&desc,
false,
false,
returnValue));
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
Mock<Kernel> kernel;
kernel.immutableData.isaGraphicsAllocation.reset(neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(
{device->getRootDeviceIndex(), MemoryConstants::pageSize, NEO::AllocationType::KERNEL_ISA, neoDevice->getDeviceBitfield()}));
kernel.immutableData.device = device;
auto commandList = std::unique_ptr<CommandList>(whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)));
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
ASSERT_NE(nullptr, commandList);
ze_group_count_t dispatchFunctionArguments{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchFunctionArguments, nullptr, 0, nullptr, launchParams);
ze_command_list_handle_t cmdListHandles[1] = {commandList->toHandle()};
returnValue = commandQueue->executeCommandLists(1, cmdListHandles, nullptr, false);
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, returnValue);
commandQueue->destroy();
neoDevice->getMemoryManager()->freeGraphicsMemory(kernel.immutableData.isaGraphicsAllocation.release());
}
} // namespace ult
} // namespace L0

View File

@@ -357,6 +357,30 @@ HWTEST2_F(CommandQueueCreate, givenLogicalStateHelperAndImmediateCmdListWhenExec
commandQueue->destroy();
}
HWTEST2_F(CommandQueueCreate, givenOutOfHostMemoryErrorFromSubmitBatchBufferWhenExecutingCommandListsThenOutOfHostMemoryIsReturned, IsAtLeastSkl) {
const ze_command_queue_desc_t desc = {};
auto commandQueue = new MockCommandQueueHw<gfxCoreFamily>(device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc);
commandQueue->initialize(false, false);
commandQueue->submitBatchBufferReturnValue = NEO::SubmissionStatus::OUT_OF_HOST_MEMORY;
Mock<Kernel> kernel;
kernel.immutableData.device = device;
ze_result_t returnValue;
auto commandList = std::unique_ptr<CommandList>(whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)));
ASSERT_NE(nullptr, commandList);
ze_group_count_t dispatchFunctionArguments{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchFunctionArguments, nullptr, 0, nullptr, launchParams);
ze_command_list_handle_t cmdListHandles[1] = {commandList->toHandle()};
const auto result = commandQueue->executeCommandLists(1, cmdListHandles, nullptr, false);
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, result);
commandQueue->destroy();
}
HWTEST2_F(CommandQueueCreate, givenGpuHangInReservingLinearStreamWhenExecutingCommandListsThenDeviceLostIsReturned, IsSKL) {
const ze_command_queue_desc_t desc = {};
MockCommandQueueHw<gfxCoreFamily> commandQueue(device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc);