diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index eb8090e31d..3e7cf740a8 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -119,6 +119,12 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K NEO::Device *neoDevice = device->getNEODevice(); + auto localMemSize = static_cast(neoDevice->getDeviceInfo().localMemSize); + auto slmTotalSize = kernelImp->getSlmTotalSize(); + if (slmTotalSize > 0 && localMemSize < slmTotalSize) { + return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; + } + if (NEO::DebugManager.flags.EnableSWTags.get()) { neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( *commandContainer.getCommandStream(), diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index dde885db4e..5ae432a9a8 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -241,6 +241,12 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs(); this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs(); + auto localMemSize = static_cast(neoDevice->getDeviceInfo().localMemSize); + auto slmTotalSize = kernelImp->getSlmTotalSize(); + if (slmTotalSize > 0 && localMemSize < slmTotalSize) { + return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; + } + std::list additionalCommands; NEO::EncodeDispatchKernelArgs dispatchKernelArgs{ diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index 6fae1d275d..35b7343255 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -352,6 +352,10 @@ ze_result_t KernelImp::suggestGroupSize(uint32_t globalSizeX, uint32_t globalSiz uint32_t numThreadsPerSubSlice = (uint32_t)deviceInfo.maxNumEUsPerSubSlice * deviceInfo.numThreadsPerEU; uint32_t localMemSize = (uint32_t)deviceInfo.localMemSize; + if (this->getSlmTotalSize() > 0 && localMemSize < this->getSlmTotalSize()) { + return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; + } + NEO::WorkSizeInfo wsInfo(maxWorkGroupSize, kernelImmData->getDescriptor().kernelAttributes.usesBarriers(), simd, this->getSlmTotalSize(), hwInfo, numThreadsPerSubSlice, localMemSize, usesImages, false, kernelImmData->getDescriptor().kernelAttributes.flags.requiresDisabledEUFusion); diff --git a/level_zero/core/source/module/module_imp.cpp b/level_zero/core/source/module/module_imp.cpp index 0b386b35c4..5cf140f535 100644 --- a/level_zero/core/source/module/module_imp.cpp +++ b/level_zero/core/source/module/module_imp.cpp @@ -759,6 +759,16 @@ ze_result_t ModuleImp::createKernel(const ze_kernel_desc_t *desc, *kernelHandle = kernel->toHandle(); } + auto localMemSize = static_cast(this->getDevice()->getNEODevice()->getDeviceInfo().localMemSize); + + for (const auto &kernelImmutableData : this->getKernelImmutableDataVector()) { + auto slmInlineSize = kernelImmutableData->getDescriptor().kernelAttributes.slmInlineSize; + if (slmInlineSize > 0 && localMemSize < slmInlineSize) { + res = ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; + break; + } + } + return res; } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp index b3cf5c11da..83b970a1f0 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp @@ -1218,7 +1218,7 @@ HWTEST_F(CmdlistAppendLaunchKernelTests, whenEncodingWorkDimForIndirectDispatchT using CommandListAppendLaunchKernel = Test; HWTEST2_F(CommandListAppendLaunchKernel, givenCooperativeAndNonCooperativeKernelsWhenAppendLaunchCooperativeKernelIsCalledThenReturnError, IsAtLeastSkl) { Mock<::L0::Kernel> kernel; - auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); + std::unique_ptr pMockModule = std::make_unique>(device, nullptr); kernel.module = pMockModule.get(); kernel.setGroupSize(4, 1, 1); @@ -1244,6 +1244,28 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenCooperativeAndNonCooperativeKernel EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); } +HWTEST2_F(CommandListAppendLaunchKernel, givenKernelWithSlmSizeExceedingLocalMemorySizeWhenAppendLaunchKernelWithParamsIsCalledThenOutOfDeviceMemoryIsReturned, IsAtLeastSkl) { + Mock<::L0::Kernel> kernel; + std::unique_ptr pMockModule = std::make_unique>(device, nullptr); + kernel.module = pMockModule.get(); + + kernel.setGroupSize(4, 1, 1); + ze_group_count_t groupCount{8, 1, 1}; + + auto pCommandList = std::make_unique>>(); + pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); + CmdListKernelLaunchParams launchParams = {}; + + auto result = pCommandList->appendLaunchKernelWithParams(&kernel, &groupCount, nullptr, launchParams); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + auto localMemSize = static_cast(device->getNEODevice()->getDeviceInfo().localMemSize); + kernel.immutableData.kernelDescriptor->kernelAttributes.slmInlineSize = localMemSize + 10u; + + result = pCommandList->appendLaunchKernelWithParams(&kernel, &groupCount, nullptr, launchParams); + EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, result); +} + HWTEST2_F(CommandListAppendLaunchKernel, GivenDebugToggleSetWhenUpdateStreamPropertiesIsCalledThenCorrectThreadArbitrationPolicyIsSet, IsAtLeastSkl) { DebugManagerStateRestore restorer; DebugManager.flags.ForceThreadArbitrationPolicyProgrammingWithScm.set(1); @@ -1253,7 +1275,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenDebugToggleSetWhenUpdateStreamProp auto nonDefaultThreadArbitrationPolicy = defaultThreadArbitrationPolicy + 1; Mock<::L0::Kernel> kernel; - auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); + std::unique_ptr pMockModule = std::make_unique>(device, nullptr); kernel.module = pMockModule.get(); auto pCommandList = std::make_unique>>(); diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_kernel_2.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_kernel_2.cpp index 20343545b7..51324d32d4 100644 --- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel_2.cpp +++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel_2.cpp @@ -179,6 +179,30 @@ TEST_P(KernelImpSuggestGroupSize, WhenSuggestingGroupThenProperGroupSizeChosen) EXPECT_EQ(0U, size % groupSize[2]); } +TEST_P(KernelImpSuggestGroupSize, WhenSlmSizeExceedsLocalMemorySizeAndSuggestingGroupSizeThenOutOfDeviceMemoryIsReturned) { + WhiteBox funcInfo = {}; + NEO::KernelDescriptor descriptor; + funcInfo.kernelDescriptor = &descriptor; + + Mock module(device, nullptr); + + uint32_t size = GetParam(); + + Mock function; + function.kernelImmData = &funcInfo; + function.module = &module; + uint32_t groupSize[3]; + EXPECT_EQ(ZE_RESULT_SUCCESS, function.KernelImp::suggestGroupSize(size, 1, 1, groupSize, groupSize + 1, groupSize + 2)); + + auto localMemSize = static_cast(device->getNEODevice()->getDeviceInfo().localMemSize); + + funcInfo.kernelDescriptor->kernelAttributes.slmInlineSize = localMemSize - 10u; + EXPECT_EQ(ZE_RESULT_SUCCESS, function.KernelImp::suggestGroupSize(size, 1, 1, groupSize, groupSize + 1, groupSize + 2)); + + funcInfo.kernelDescriptor->kernelAttributes.slmInlineSize = localMemSize + 10u; + EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, function.KernelImp::suggestGroupSize(size, 1, 1, groupSize, groupSize + 1, groupSize + 2)); +} + TEST_F(KernelImp, GivenInvalidValuesWhenSettingGroupSizeThenInvalidArgumentErrorIsReturned) { Mock kernel; EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, kernel.KernelImp::setGroupSize(0U, 1U, 1U)); diff --git a/level_zero/core/test/unit_tests/sources/module/test_module.cpp b/level_zero/core/test/unit_tests/sources/module/test_module.cpp index cbedff0901..6e59839cf8 100644 --- a/level_zero/core/test/unit_tests/sources/module/test_module.cpp +++ b/level_zero/core/test/unit_tests/sources/module/test_module.cpp @@ -3011,6 +3011,42 @@ TEST_F(ModuleTests, givenImplicitArgsRelocationAndStackCallsWhenLinkingBuiltinMo Kernel::fromHandle(kernelHandle)->destroy(); } +TEST_F(ModuleTests, givenFullyLinkedModuleAndSlmSizeExceedingLocalMemorySizeWhenCreatingKernelThenOutOfDeviceMemoryIsReturned) { + auto pModule = std::make_unique>(device, nullptr, ModuleType::Builtin); + pModule->maxGroupSize = 32; + + char data[64]{}; + std::unique_ptr kernelInfo = std::make_unique(); + kernelInfo->heapInfo.KernelHeapSize = 64; + kernelInfo->heapInfo.pKernelHeap = data; + + auto localMemSize = static_cast(this->device->getNEODevice()->getDeviceInfo().localMemSize); + kernelInfo->kernelDescriptor.kernelAttributes.slmInlineSize = localMemSize + 10u; + + std::unique_ptr> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)}; + kernelImmData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, true); + + pModule->kernelImmDatas.push_back(std::move(kernelImmData)); + pModule->translationUnit->programInfo.kernelInfos.push_back(kernelInfo.release()); + auto linkerInput = std::make_unique<::WhiteBox>(); + linkerInput->traits.requiresPatchingOfInstructionSegments = true; + linkerInput->textRelocations.push_back({{implicitArgsRelocationSymbolName, 0x8, LinkerInput::RelocationInfo::Type::AddressLow, SegmentType::Instructions}}); + pModule->translationUnit->programInfo.linkerInput = std::move(linkerInput); + + auto status = pModule->linkBinary(); + EXPECT_TRUE(status); + + ze_kernel_handle_t kernelHandle; + + ze_kernel_desc_t kernelDesc = {}; + kernelDesc.pKernelName = pModule->translationUnit->programInfo.kernelInfos[0]->kernelDescriptor.kernelMetadata.kernelName.c_str(); + + ze_result_t res = pModule->createKernel(&kernelDesc, &kernelHandle); + + EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, res); + Kernel::fromHandle(kernelHandle)->destroy(); +} + TEST_F(ModuleTests, givenImplicitArgsRelocationAndStackCallsWhenLinkingModuleThenSegmentIsPatchedAndImplicitArgsAreRequired) { auto pModule = std::make_unique(device, nullptr, ModuleType::User); diff --git a/opencl/source/api/api.cpp b/opencl/source/api/api.cpp index 5393e4c08a..1763e8a106 100644 --- a/opencl/source/api/api.cpp +++ b/opencl/source/api/api.cpp @@ -3507,6 +3507,16 @@ cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue commandQueue, } Kernel *pKernel = pMultiDeviceKernel->getKernel(pCommandQueue->getDevice().getRootDeviceIndex()); + + auto localMemSize = static_cast(pCommandQueue->getDevice().getDeviceInfo().localMemSize); + auto slmInlineSize = pKernel->getDescriptor().kernelAttributes.slmInlineSize; + + if (slmInlineSize > 0 && localMemSize < slmInlineSize) { + retVal = CL_OUT_OF_RESOURCES; + TRACING_EXIT(ClEnqueueNdRangeKernel, &retVal); + return retVal; + } + if ((pKernel->getExecutionType() != KernelExecutionType::Default) || pKernel->usesSyncBuffer()) { retVal = CL_INVALID_KERNEL; diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h index 4111c7588d..a0aa4c6101 100644 --- a/opencl/source/kernel/kernel.h +++ b/opencl/source/kernel/kernel.h @@ -101,6 +101,13 @@ class Kernel : public ReferenceTrackedObject { pKernel = nullptr; } + auto localMemSize = static_cast(clDevice.getDevice().getDeviceInfo().localMemSize); + auto slmInlineSize = kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize; + + if (slmInlineSize > 0 && localMemSize < slmInlineSize) { + retVal = CL_OUT_OF_RESOURCES; + } + if (errcodeRet) { *errcodeRet = retVal; } diff --git a/opencl/test/unit_test/api/cl_enqueue_nd_range_kernel_tests.inl b/opencl/test/unit_test/api/cl_enqueue_nd_range_kernel_tests.inl index 66d5426571..c711fc6f73 100644 --- a/opencl/test/unit_test/api/cl_enqueue_nd_range_kernel_tests.inl +++ b/opencl/test/unit_test/api/cl_enqueue_nd_range_kernel_tests.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -39,6 +39,46 @@ TEST_F(clEnqueueNDRangeKernelTests, GivenValidParametersWhenExecutingKernelThenS EXPECT_EQ(CL_SUCCESS, retVal); } +TEST_F(clEnqueueNDRangeKernelTests, GivenKernelWithSlmSizeExceedingLocalMemorySizeWhenExecutingKernelThenOutOfResourcesIsReturned) { + cl_uint workDim = 1; + size_t globalWorkOffset[3] = {0, 0, 0}; + size_t globalWorkSize[3] = {1, 1, 1}; + size_t localWorkSize[3] = {1, 1, 1}; + cl_uint numEventsInWaitList = 0; + cl_event *eventWaitList = nullptr; + cl_event *event = nullptr; + + auto localMemSize = static_cast(pDevice->getDevice().getDeviceInfo().localMemSize); + + pProgram->mockKernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize = localMemSize - 10u; + retVal = clEnqueueNDRangeKernel( + pCommandQueue, + pMultiDeviceKernel, + workDim, + globalWorkOffset, + globalWorkSize, + localWorkSize, + numEventsInWaitList, + eventWaitList, + event); + + EXPECT_EQ(CL_SUCCESS, retVal); + + pProgram->mockKernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize = localMemSize + 10u; + retVal = clEnqueueNDRangeKernel( + pCommandQueue, + pMultiDeviceKernel, + workDim, + globalWorkOffset, + globalWorkSize, + localWorkSize, + numEventsInWaitList, + eventWaitList, + event); + + EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); +} + TEST_F(clEnqueueNDRangeKernelTests, GivenQueueIncapableWhenExecutingKernelThenInvalidOperationIsReturned) { cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 2ffb5d4ac5..dcefda6651 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -2977,7 +2977,8 @@ TEST_F(KernelMultiRootDeviceTest, givenKernelWithPrivateSurfaceWhenInitializeThe } } -TEST(KernelCreateTest, whenInitFailedThenReturnNull) { +class KernelCreateTest : public ::testing::Test { + protected: struct MockProgram { ClDeviceVector getDevices() { ClDeviceVector deviceVector; @@ -2986,19 +2987,46 @@ TEST(KernelCreateTest, whenInitFailedThenReturnNull) { } void getSource(std::string &) {} MockClDevice mDevice{new MockDevice}; - } mockProgram; + }; + struct MockKernel { MockKernel(MockProgram *, const KernelInfo &, ClDevice &) {} int initialize() { return -1; }; }; - KernelInfo info; + MockProgram mockProgram{}; +}; + +TEST_F(KernelCreateTest, whenInitFailedThenReturnNull) { + KernelInfo info{}; info.kernelDescriptor.kernelAttributes.gpuPointerSize = 8; auto ret = Kernel::create(&mockProgram, info, mockProgram.mDevice, nullptr); EXPECT_EQ(nullptr, ret); } +TEST_F(KernelCreateTest, whenSlmSizeExceedsLocalMemorySizeThenReturnOutOfResources) { + KernelInfo info{}; + cl_int retVal{}; + + auto localMemSize = static_cast(mockProgram.mDevice.getDevice().getDeviceInfo().localMemSize); + + info.kernelDescriptor.kernelAttributes.slmInlineSize = localMemSize - 10u; + auto ret = Kernel::create(&mockProgram, info, mockProgram.mDevice, &retVal); + EXPECT_EQ(nullptr, ret); + EXPECT_NE(CL_OUT_OF_RESOURCES, retVal); + + retVal = 0; + + info.kernelDescriptor.kernelAttributes.slmInlineSize = localMemSize + 10u; + ret = Kernel::create(&mockProgram, info, mockProgram.mDevice, &retVal); + EXPECT_EQ(nullptr, ret); + EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); + + ret = Kernel::create(&mockProgram, info, mockProgram.mDevice, nullptr); + EXPECT_EQ(nullptr, ret); +} + TEST(MultiDeviceKernelCreateTest, whenInitFailedThenReturnNullAndPropagateErrorCode) { MockContext context; auto pKernelInfo = std::make_unique();