Print warning when kernel uses too much SLM

Instead of just returning proper error code in case of exceeding
available Shared Local Memory size we also want to print error message
to make debugging easier.

Related-To: NEO-7280
Signed-off-by: Fabian Zwolinski <fabian.zwolinski@intel.com>
This commit is contained in:
Fabian Zwolinski 2022-10-07 10:44:43 +00:00 committed by Compute-Runtime-Automation
parent d4cddc7ecd
commit 7953d15826
15 changed files with 101 additions and 8 deletions

View File

@ -122,6 +122,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
auto localMemSize = static_cast<uint32_t>(neoDevice->getDeviceInfo().localMemSize);
auto slmTotalSize = kernelImp->getSlmTotalSize();
if (slmTotalSize > 0 && localMemSize < slmTotalSize) {
PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Size of SLM (%u) larger than available (%u)\n", slmTotalSize, localMemSize);
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
}

View File

@ -244,6 +244,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
auto localMemSize = static_cast<uint32_t>(neoDevice->getDeviceInfo().localMemSize);
auto slmTotalSize = kernelImp->getSlmTotalSize();
if (slmTotalSize > 0 && localMemSize < slmTotalSize) {
PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Size of SLM (%u) larger than available (%u)\n", slmTotalSize, localMemSize);
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
}

View File

@ -353,6 +353,7 @@ ze_result_t KernelImp::suggestGroupSize(uint32_t globalSizeX, uint32_t globalSiz
uint32_t localMemSize = (uint32_t)deviceInfo.localMemSize;
if (this->getSlmTotalSize() > 0 && localMemSize < this->getSlmTotalSize()) {
PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Size of SLM (%u) larger than available (%u)\n", this->getSlmTotalSize(), localMemSize);
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
}

View File

@ -351,6 +351,8 @@ ze_result_t ModuleTranslationUnit::processUnpackedBinary() {
}
if (slmNeeded > slmAvailable) {
PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Size of SLM (%u) larger than available (%u)\n",
static_cast<uint32_t>(slmNeeded), static_cast<uint32_t>(slmAvailable));
return ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
}
@ -765,6 +767,7 @@ ze_result_t ModuleImp::createKernel(const ze_kernel_desc_t *desc,
for (const auto &kernelImmutableData : this->getKernelImmutableDataVector()) {
auto slmInlineSize = kernelImmutableData->getDescriptor().kernelAttributes.slmInlineSize;
if (slmInlineSize > 0 && localMemSize < slmInlineSize) {
PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Size of SLM (%u) larger than available (%u)\n", slmInlineSize, localMemSize);
res = ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
break;
}

View File

@ -124,7 +124,7 @@ int main(int argc, char *argv[]) {
[[maybe_unused]] auto result = fread(kernelOutput.get(), sizeof(char), sizeOfBuffer - 1, kernelOutputFile);
fclose(kernelOutputFile);
fflush(stdout);
// adjust/reatore stdout to previous descriptor
// adjust/restore stdout to previous descriptor
dup2(stdoutFd, fileno(stdout));
// close duplicate
close(stdoutFd);

View File

@ -1244,7 +1244,10 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenCooperativeAndNonCooperativeKernel
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result);
}
HWTEST2_F(CommandListAppendLaunchKernel, givenKernelWithSlmSizeExceedingLocalMemorySizeWhenAppendLaunchKernelWithParamsIsCalledThenOutOfDeviceMemoryIsReturned, IsAtLeastSkl) {
HWTEST2_F(CommandListAppendLaunchKernel, givenKernelWithSlmSizeExceedingLocalMemorySizeWhenAppendLaunchKernelWithParamsIsCalledThenDebugMsgErrIsPrintedAndOutOfDeviceMemoryIsReturned, IsAtLeastSkl) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.PrintDebugMessages.set(true);
Mock<::L0::Kernel> kernel;
std::unique_ptr<Module> pMockModule = std::make_unique<Mock<Module>>(device, nullptr);
kernel.module = pMockModule.get();
@ -1256,14 +1259,26 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelWithSlmSizeExceedingLocalMem
pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
CmdListKernelLaunchParams launchParams = {};
::testing::internal::CaptureStderr();
auto result = pCommandList->appendLaunchKernelWithParams(&kernel, &groupCount, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
std::string output = testing::internal::GetCapturedStderr();
EXPECT_EQ(std::string(""), output);
auto localMemSize = static_cast<uint32_t>(device->getNEODevice()->getDeviceInfo().localMemSize);
kernel.immutableData.kernelDescriptor->kernelAttributes.slmInlineSize = localMemSize + 10u;
::testing::internal::CaptureStderr();
result = pCommandList->appendLaunchKernelWithParams(&kernel, &groupCount, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, result);
output = testing::internal::GetCapturedStderr();
const auto &slmInlineSize = kernel.immutableData.kernelDescriptor->kernelAttributes.slmInlineSize;
std::string expectedOutput = "Size of SLM (" + std::to_string(slmInlineSize) + ") larger than available (" + std::to_string(localMemSize) + ")\n";
EXPECT_EQ(expectedOutput, output);
}
HWTEST2_F(CommandListAppendLaunchKernel, GivenDebugToggleSetWhenUpdateStreamPropertiesIsCalledThenCorrectThreadArbitrationPolicyIsSet, IsAtLeastSkl) {

View File

@ -179,7 +179,10 @@ TEST_P(KernelImpSuggestGroupSize, WhenSuggestingGroupThenProperGroupSizeChosen)
EXPECT_EQ(0U, size % groupSize[2]);
}
TEST_P(KernelImpSuggestGroupSize, WhenSlmSizeExceedsLocalMemorySizeAndSuggestingGroupSizeThenOutOfDeviceMemoryIsReturned) {
TEST_P(KernelImpSuggestGroupSize, WhenSlmSizeExceedsLocalMemorySizeAndSuggestingGroupSizeThenDebugMsgErrIsPrintedAndOutOfDeviceMemoryIsReturned) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.PrintDebugMessages.set(true);
WhiteBox<KernelImmutableData> funcInfo = {};
NEO::KernelDescriptor descriptor;
funcInfo.kernelDescriptor = &descriptor;
@ -196,11 +199,23 @@ TEST_P(KernelImpSuggestGroupSize, WhenSlmSizeExceedsLocalMemorySizeAndSuggesting
auto localMemSize = static_cast<uint32_t>(device->getNEODevice()->getDeviceInfo().localMemSize);
::testing::internal::CaptureStderr();
funcInfo.kernelDescriptor->kernelAttributes.slmInlineSize = localMemSize - 10u;
EXPECT_EQ(ZE_RESULT_SUCCESS, function.KernelImp::suggestGroupSize(size, 1, 1, groupSize, groupSize + 1, groupSize + 2));
std::string output = testing::internal::GetCapturedStderr();
EXPECT_EQ(std::string(""), output);
::testing::internal::CaptureStderr();
funcInfo.kernelDescriptor->kernelAttributes.slmInlineSize = localMemSize + 10u;
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, function.KernelImp::suggestGroupSize(size, 1, 1, groupSize, groupSize + 1, groupSize + 2));
output = testing::internal::GetCapturedStderr();
const auto &slmInlineSize = funcInfo.kernelDescriptor->kernelAttributes.slmInlineSize;
std::string expectedOutput = "Size of SLM (" + std::to_string(slmInlineSize) + ") larger than available (" + std::to_string(localMemSize) + ")\n";
EXPECT_EQ(expectedOutput, output);
}
TEST_F(KernelImp, GivenInvalidValuesWhenSettingGroupSizeThenInvalidArgumentErrorIsReturned) {

View File

@ -3046,7 +3046,10 @@ TEST_F(ModuleTests, givenImplicitArgsRelocationAndStackCallsWhenLinkingBuiltinMo
Kernel::fromHandle(kernelHandle)->destroy();
}
TEST_F(ModuleTests, givenFullyLinkedModuleAndSlmSizeExceedingLocalMemorySizeWhenCreatingKernelThenOutOfDeviceMemoryIsReturned) {
TEST_F(ModuleTests, givenFullyLinkedModuleAndSlmSizeExceedingLocalMemorySizeWhenCreatingKernelThenDebugMsgErrIsPrintedAndOutOfDeviceMemoryIsReturned) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.PrintDebugMessages.set(true);
auto pModule = std::make_unique<WhiteBox<Module>>(device, nullptr, ModuleType::Builtin);
pModule->maxGroupSize = 32;
@ -3057,6 +3060,7 @@ TEST_F(ModuleTests, givenFullyLinkedModuleAndSlmSizeExceedingLocalMemorySizeWhen
auto localMemSize = static_cast<uint32_t>(this->device->getNEODevice()->getDeviceInfo().localMemSize);
kernelInfo->kernelDescriptor.kernelAttributes.slmInlineSize = localMemSize + 10u;
auto slmInlineSizeCopy = kernelInfo->kernelDescriptor.kernelAttributes.slmInlineSize;
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
kernelImmData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, true);
@ -3071,6 +3075,8 @@ TEST_F(ModuleTests, givenFullyLinkedModuleAndSlmSizeExceedingLocalMemorySizeWhen
auto status = pModule->linkBinary();
EXPECT_TRUE(status);
::testing::internal::CaptureStderr();
ze_kernel_handle_t kernelHandle;
ze_kernel_desc_t kernelDesc = {};
@ -3079,6 +3085,11 @@ TEST_F(ModuleTests, givenFullyLinkedModuleAndSlmSizeExceedingLocalMemorySizeWhen
ze_result_t res = pModule->createKernel(&kernelDesc, &kernelHandle);
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, res);
std::string output = testing::internal::GetCapturedStderr();
std::string expectedOutput = "Size of SLM (" + std::to_string(slmInlineSizeCopy) + ") larger than available (" + std::to_string(localMemSize) + ")\n";
EXPECT_EQ(expectedOutput, output);
Kernel::fromHandle(kernelHandle)->destroy();
}

View File

@ -3512,6 +3512,7 @@ cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue commandQueue,
auto slmInlineSize = pKernel->getDescriptor().kernelAttributes.slmInlineSize;
if (slmInlineSize > 0 && localMemSize < slmInlineSize) {
PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Size of SLM (%u) larger than available (%u)\n", slmInlineSize, localMemSize);
retVal = CL_OUT_OF_RESOURCES;
TRACING_EXIT(ClEnqueueNdRangeKernel, &retVal);
return retVal;

View File

@ -105,6 +105,7 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
auto slmInlineSize = kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize;
if (slmInlineSize > 0 && localMemSize < slmInlineSize) {
PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Size of SLM (%u) larger than available (%u)\n", slmInlineSize, localMemSize);
retVal = CL_OUT_OF_RESOURCES;
}

View File

@ -216,6 +216,8 @@ cl_int Program::processProgramInfo(ProgramInfo &src, const ClDevice &clDevice) {
setLinkerInput(rootDeviceIndex, std::move(src.linkerInput));
if (slmNeeded > slmAvailable) {
PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Size of SLM (%u) larger than available (%u)\n",
static_cast<uint32_t>(slmNeeded), static_cast<uint32_t>(slmAvailable));
return CL_OUT_OF_RESOURCES;
}

View File

@ -39,7 +39,10 @@ TEST_F(clEnqueueNDRangeKernelTests, GivenValidParametersWhenExecutingKernelThenS
EXPECT_EQ(CL_SUCCESS, retVal);
}
TEST_F(clEnqueueNDRangeKernelTests, GivenKernelWithSlmSizeExceedingLocalMemorySizeWhenExecutingKernelThenOutOfResourcesIsReturned) {
TEST_F(clEnqueueNDRangeKernelTests, GivenKernelWithSlmSizeExceedingLocalMemorySizeWhenExecutingKernelThenDebugMsgErrIsPrintedAndOutOfResourcesIsReturned) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.PrintDebugMessages.set(true);
cl_uint workDim = 1;
size_t globalWorkOffset[3] = {0, 0, 0};
size_t globalWorkSize[3] = {1, 1, 1};
@ -48,6 +51,8 @@ TEST_F(clEnqueueNDRangeKernelTests, GivenKernelWithSlmSizeExceedingLocalMemorySi
cl_event *eventWaitList = nullptr;
cl_event *event = nullptr;
::testing::internal::CaptureStderr();
auto localMemSize = static_cast<uint32_t>(pDevice->getDevice().getDeviceInfo().localMemSize);
pProgram->mockKernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize = localMemSize - 10u;
@ -64,6 +69,11 @@ TEST_F(clEnqueueNDRangeKernelTests, GivenKernelWithSlmSizeExceedingLocalMemorySi
EXPECT_EQ(CL_SUCCESS, retVal);
std::string output = testing::internal::GetCapturedStderr();
EXPECT_EQ(std::string(""), output);
::testing::internal::CaptureStderr();
pProgram->mockKernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize = localMemSize + 10u;
retVal = clEnqueueNDRangeKernel(
pCommandQueue,
@ -77,6 +87,11 @@ TEST_F(clEnqueueNDRangeKernelTests, GivenKernelWithSlmSizeExceedingLocalMemorySi
event);
EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal);
output = testing::internal::GetCapturedStderr();
const auto &slmInlineSize = pProgram->mockKernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize;
std::string expectedOutput = "Size of SLM (" + std::to_string(slmInlineSize) + ") larger than available (" + std::to_string(localMemSize) + ")\n";
EXPECT_EQ(expectedOutput, output);
}
TEST_F(clEnqueueNDRangeKernelTests, GivenQueueIncapableWhenExecutingKernelThenInvalidOperationIsReturned) {

View File

@ -3005,10 +3005,15 @@ TEST_F(KernelCreateTest, whenInitFailedThenReturnNull) {
EXPECT_EQ(nullptr, ret);
}
TEST_F(KernelCreateTest, whenSlmSizeExceedsLocalMemorySizeThenReturnOutOfResources) {
TEST_F(KernelCreateTest, whenSlmSizeExceedsLocalMemorySizeThenDebugMsgErrIsPrintedAndOutOfResourcesIsReturned) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.PrintDebugMessages.set(true);
KernelInfo info{};
cl_int retVal{};
::testing::internal::CaptureStderr();
auto localMemSize = static_cast<uint32_t>(mockProgram.mDevice.getDevice().getDeviceInfo().localMemSize);
info.kernelDescriptor.kernelAttributes.slmInlineSize = localMemSize - 10u;
@ -3016,6 +3021,11 @@ TEST_F(KernelCreateTest, whenSlmSizeExceedsLocalMemorySizeThenReturnOutOfResourc
EXPECT_EQ(nullptr, ret);
EXPECT_NE(CL_OUT_OF_RESOURCES, retVal);
std::string output = testing::internal::GetCapturedStderr();
EXPECT_EQ(std::string(""), output);
::testing::internal::CaptureStderr();
retVal = 0;
info.kernelDescriptor.kernelAttributes.slmInlineSize = localMemSize + 10u;
@ -3023,6 +3033,11 @@ TEST_F(KernelCreateTest, whenSlmSizeExceedsLocalMemorySizeThenReturnOutOfResourc
EXPECT_EQ(nullptr, ret);
EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal);
output = testing::internal::GetCapturedStderr();
const auto &slmInlineSize = info.kernelDescriptor.kernelAttributes.slmInlineSize;
std::string expectedOutput = "Size of SLM (" + std::to_string(slmInlineSize) + ") larger than available (" + std::to_string(localMemSize) + ")\n";
EXPECT_EQ(expectedOutput, output);
ret = Kernel::create<MockKernel>(&mockProgram, info, mockProgram.mDevice, nullptr);
EXPECT_EQ(nullptr, ret);
}

View File

@ -2063,16 +2063,28 @@ TEST_F(ProgramTests, whenCreatingFromZebinThenAppendAllowZebinFlagToBuildOptions
EXPECT_STREQ(expectedOptions.c_str(), program->options.c_str());
}
TEST_F(ProgramTests, givenProgramFromGenBinaryWhenSLMSizeIsBiggerThenDeviceLimitThenReturnError) {
TEST_F(ProgramTests, givenProgramFromGenBinaryWhenSLMSizeIsBiggerThenDeviceLimitThenPrintDebugMsgAndReturnError) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.PrintDebugMessages.set(true);
PatchTokensTestData::ValidProgramWithKernelUsingSlm patchtokensProgram;
patchtokensProgram.slmMutable->TotalInlineLocalMemorySize = static_cast<uint32_t>(pDevice->getDeviceInfo().localMemSize * 2);
patchtokensProgram.recalcTokPtr();
auto program = std::make_unique<MockProgram>(nullptr, false, toClDeviceVector(*pClDevice));
program->buildInfos[rootDeviceIndex].unpackedDeviceBinary = makeCopy(patchtokensProgram.storage.data(), patchtokensProgram.storage.size());
program->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize = patchtokensProgram.storage.size();
::testing::internal::CaptureStderr();
auto retVal = program->processGenBinary(*pClDevice);
EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal);
std::string output = testing::internal::GetCapturedStderr();
const auto &slmInlineSize = patchtokensProgram.slmMutable->TotalInlineLocalMemorySize;
const auto &localMemSize = pDevice->getDeviceInfo().localMemSize;
std::string expectedOutput = "Size of SLM (" + std::to_string(slmInlineSize) + ") larger than available (" + std::to_string(localMemSize) + ")\n";
EXPECT_EQ(expectedOutput, output);
}
TEST_F(ProgramTests, givenExistingConstantSurfacesWhenProcessGenBinaryThenCleanupTheSurfaceOnlyForSpecificDevice) {

View File

@ -62,7 +62,7 @@ void WorkSizeInfo::setMinWorkGroupSize(const HardwareInfo *hwInfo, bool disableE
}
if (slmTotalSize > 0) {
if (localMemSize < slmTotalSize) {
PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Size of SLM (%d) larger than available (%d)\n", slmTotalSize, localMemSize);
PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Size of SLM (%u) larger than available (%u)\n", slmTotalSize, localMemSize);
}
UNRECOVERABLE_IF(localMemSize < slmTotalSize);
minWorkGroupSize = std::max(maxWorkGroupSize / ((localMemSize / slmTotalSize)), minWorkGroupSize);