fix: improve the handling of private surface allocation failures

Related-To: NEO-12664

Signed-off-by: Wenbin Lu <wenbin.lu@intel.com>
This commit is contained in:
Wenbin Lu 2024-10-10 17:04:59 +00:00 committed by Compute-Runtime-Automation
parent 227ce579fe
commit a82b5182f6
2 changed files with 41 additions and 2 deletions

View File

@ -10,6 +10,7 @@
#include "shared/source/assert_handler/assert_handler.h" #include "shared/source/assert_handler/assert_handler.h"
#include "shared/source/command_container/implicit_scaling.h" #include "shared/source/command_container/implicit_scaling.h"
#include "shared/source/debugger/debugger_l0.h" #include "shared/source/debugger/debugger_l0.h"
#include "shared/source/execution_environment/execution_environment.h"
#include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/addressing_mode_helper.h" #include "shared/source/helpers/addressing_mode_helper.h"
@ -23,7 +24,6 @@
#include "shared/source/helpers/local_work_size.h" #include "shared/source/helpers/local_work_size.h"
#include "shared/source/helpers/per_thread_data.h" #include "shared/source/helpers/per_thread_data.h"
#include "shared/source/helpers/ray_tracing_helper.h" #include "shared/source/helpers/ray_tracing_helper.h"
#include "shared/source/helpers/register_offsets.h"
#include "shared/source/helpers/simd_helper.h" #include "shared/source/helpers/simd_helper.h"
#include "shared/source/helpers/string.h" #include "shared/source/helpers/string.h"
#include "shared/source/helpers/surface_format_info.h" #include "shared/source/helpers/surface_format_info.h"
@ -970,7 +970,14 @@ NEO::GraphicsAllocation *KernelImp::allocatePrivateMemoryGraphicsAllocation() {
auto privateMemoryGraphicsAllocation = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties( auto privateMemoryGraphicsAllocation = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(
{neoDevice->getRootDeviceIndex(), privateSurfaceSize, NEO::AllocationType::privateSurface, neoDevice->getDeviceBitfield()}); {neoDevice->getRootDeviceIndex(), privateSurfaceSize, NEO::AllocationType::privateSurface, neoDevice->getDeviceBitfield()});
UNRECOVERABLE_IF(privateMemoryGraphicsAllocation == nullptr); if (privateMemoryGraphicsAllocation == nullptr) {
const auto usedLocalMemorySize = neoDevice->getMemoryManager()->getUsedLocalMemorySize(neoDevice->getRootDeviceIndex());
const auto maxGlobalMemorySize = neoDevice->getRootDevice()->getGlobalMemorySize(static_cast<uint32_t>(neoDevice->getDeviceBitfield().to_ulong()));
CREATE_DEBUG_STRING(str, "Failed to allocate private surface of %zu bytes, used local memory %zu, max global memory %zu\n", static_cast<size_t>(privateSurfaceSize), usedLocalMemorySize, static_cast<size_t>(maxGlobalMemorySize));
neoDevice->getRootDeviceEnvironment().executionEnvironment.setErrorDescription(std::string(str.get()));
PRINT_DEBUG_STRING(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, str.get());
}
return privateMemoryGraphicsAllocation; return privateMemoryGraphicsAllocation;
} }
@ -1137,6 +1144,9 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
auto &kernelAttributes = kernelDescriptor.kernelAttributes; auto &kernelAttributes = kernelDescriptor.kernelAttributes;
if ((kernelAttributes.perHwThreadPrivateMemorySize != 0U) && (false == module->shouldAllocatePrivateMemoryPerDispatch())) { if ((kernelAttributes.perHwThreadPrivateMemorySize != 0U) && (false == module->shouldAllocatePrivateMemoryPerDispatch())) {
this->privateMemoryGraphicsAllocation = allocatePrivateMemoryGraphicsAllocation(); this->privateMemoryGraphicsAllocation = allocatePrivateMemoryGraphicsAllocation();
if (this->privateMemoryGraphicsAllocation == nullptr) {
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
}
this->patchCrossthreadDataWithPrivateAllocation(this->privateMemoryGraphicsAllocation); this->patchCrossthreadDataWithPrivateAllocation(this->privateMemoryGraphicsAllocation);
this->internalResidencyContainer.push_back(this->privateMemoryGraphicsAllocation); this->internalResidencyContainer.push_back(this->privateMemoryGraphicsAllocation);
} }

View File

@ -187,6 +187,35 @@ TEST_F(KernelInitTest, givenKernelToInitWhenItHasTooBigScratchSizeThenInvalidBin
EXPECT_EQ(kernel->initialize(&desc), ZE_RESULT_ERROR_INVALID_NATIVE_BINARY); EXPECT_EQ(kernel->initialize(&desc), ZE_RESULT_ERROR_INVALID_NATIVE_BINARY);
} }
TEST_F(KernelInitTest, givenKernelToInitWhenPrivateSurfaceAllocationFailsThenOutOfDeviceMemoryIsRetutned) {
DebugManagerStateRestore restorer;
debugManager.flags.PrintDebugMessages.set(true);
::testing::internal::CaptureStderr();
uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
std::unique_ptr<MockImmutableData> mockKernelImmData =
std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
createModuleFromMockBinary(perHwThreadPrivateMemorySizeRequested, false, mockKernelImmData.get());
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
ze_kernel_desc_t desc = {};
desc.pKernelName = kernelName.c_str();
mockKernelImmData->resizeExplicitArgs(1);
std::unique_ptr<NEO::MemoryManager> otherMemoryManager = std::make_unique<NEO::FailMemoryManager>(0, *device->getNEODevice()->getExecutionEnvironment());
device->getNEODevice()->getExecutionEnvironment()->memoryManager.swap(otherMemoryManager);
EXPECT_EQ(kernel->initialize(&desc), ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY);
device->getNEODevice()->getExecutionEnvironment()->memoryManager.swap(otherMemoryManager);
auto output = ::testing::internal::GetCapturedStderr();
std::string errorMsg = "Failed to allocate private surface";
EXPECT_NE(std::string::npos, output.find(errorMsg));
}
using KernelBaseAddressTests = Test<ModuleImmutableDataFixture>; using KernelBaseAddressTests = Test<ModuleImmutableDataFixture>;
TEST_F(KernelBaseAddressTests, whenQueryingKernelBaseAddressThenCorrectAddressIsReturned) { TEST_F(KernelBaseAddressTests, whenQueryingKernelBaseAddressThenCorrectAddressIsReturned) {
uint32_t perHwThreadPrivateMemorySizeRequested = 32u; uint32_t perHwThreadPrivateMemorySizeRequested = 32u;