From e0ccf2255700c6827596b8123cdf0ecdb66d2a04 Mon Sep 17 00:00:00 2001 From: Dominik Dabek Date: Thu, 18 Jan 2024 13:23:34 +0000 Subject: [PATCH] fix: indirect access in external functions Read indirect_stateless_count in module external functions. If greater than 0, mark all kernels that have the has_stack_calls flag set from this module as having indirect accesses. Related-To: NEO-7712 Signed-off-by: Dominik Dabek --- level_zero/core/source/kernel/kernel_imp.cpp | 1 + .../unit_tests/sources/kernel/test_kernel.cpp | 42 +++++++++++++ opencl/source/kernel/kernel.cpp | 1 + .../source/program/process_device_binary.cpp | 1 + opencl/source/program/program.h | 2 + opencl/test/unit_test/kernel/kernel_tests.cpp | 62 ++++++++++++++++++- opencl/test/unit_test/mocks/mock_program.h | 3 +- .../zebin/zeinfo_decoder.cpp | 4 ++ shared/source/program/program_info.h | 1 + shared/test/common/mocks/mock_modules_zebin.h | 3 +- .../device_binary_formats_tests.cpp | 3 +- .../zebin_decoder_tests.cpp | 4 ++ 12 files changed, 123 insertions(+), 4 deletions(-) diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index de421a7b1b..7a59a46870 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -1109,6 +1109,7 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) { kernelDescriptor.kernelAttributes.hasNonKernelArgStore || kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic || kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess || + (moduleImp->getTranslationUnit()->programInfo.functionPointerWithIndirectAccessExists && kernelDescriptor.kernelAttributes.flags.useStackCalls) || NEO::KernelHelper::isAnyArgumentPtrByValue(kernelDescriptor); } else { kernelHasIndirectAccess = true; diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp index e81d411280..69afe3d8a7 100644 --- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp @@ -1387,6 +1387,7 @@ TEST_F(KernelIndirectPropertiesFromIGCTests, givenDetectIndirectAccessInKernelEn module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false; module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false; module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasIndirectStatelessAccess = false; + module->getTranslationUnit()->programInfo.functionPointerWithIndirectAccessExists = false; kernel->initialize(&desc); @@ -1404,6 +1405,7 @@ TEST_F(KernelIndirectPropertiesFromIGCTests, givenDetectIndirectAccessInKernelEn module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = true; module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false; module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasIndirectStatelessAccess = false; + module->getTranslationUnit()->programInfo.functionPointerWithIndirectAccessExists = false; kernel->initialize(&desc); @@ -1421,6 +1423,7 @@ TEST_F(KernelIndirectPropertiesFromIGCTests, givenDetectIndirectAccessInKernelEn module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false; module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = true; module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasIndirectStatelessAccess = false; + module->getTranslationUnit()->programInfo.functionPointerWithIndirectAccessExists = false; kernel->initialize(&desc); @@ -1438,6 +1441,45 @@ TEST_F(KernelIndirectPropertiesFromIGCTests, givenDetectIndirectAccessInKernelEn module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false; module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false; module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasIndirectStatelessAccess = true; + module->getTranslationUnit()->programInfo.functionPointerWithIndirectAccessExists = false; + + kernel->initialize(&desc); + + EXPECT_TRUE(kernel->hasIndirectAccess()); + } + + { + std::unique_ptr kernel; + kernel = std::make_unique(module.get()); + + ze_kernel_desc_t desc = {}; + desc.pKernelName = kernelName.c_str(); + + module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false; + module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false; + module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false; + module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasIndirectStatelessAccess = false; + module->getTranslationUnit()->programInfo.functionPointerWithIndirectAccessExists = true; + module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.flags.useStackCalls = false; + + kernel->initialize(&desc); + + EXPECT_FALSE(kernel->hasIndirectAccess()); + } + + { + std::unique_ptr kernel; + kernel = std::make_unique(module.get()); + + ze_kernel_desc_t desc = {}; + desc.pKernelName = kernelName.c_str(); + + module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false; + module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false; + module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false; + module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasIndirectStatelessAccess = false; + module->getTranslationUnit()->programInfo.functionPointerWithIndirectAccessExists = true; + module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.flags.useStackCalls = true; kernel->initialize(&desc); diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 8c49a92011..759a01f8a6 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -296,6 +296,7 @@ cl_int Kernel::initialize() { kernelDescriptor.kernelAttributes.hasNonKernelArgStore || kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic || kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess || + (program->getFunctionPointerWithIndirectAccessExists() && kernelDescriptor.kernelAttributes.flags.useStackCalls) || NEO::KernelHelper::isAnyArgumentPtrByValue(kernelDescriptor); } else { this->kernelHasIndirectAccess = true; diff --git a/opencl/source/program/process_device_binary.cpp b/opencl/source/program/process_device_binary.cpp index 909dab1719..f96b582506 100644 --- a/opencl/source/program/process_device_binary.cpp +++ b/opencl/source/program/process_device_binary.cpp @@ -289,6 +289,7 @@ cl_int Program::processProgramInfo(ProgramInfo &src, const ClDevice &clDevice) { } indirectDetectionVersion = src.indirectDetectionVersion; + functionPointerWithIndirectAccessExists = src.functionPointerWithIndirectAccessExists; return linkBinary(&clDevice.getDevice(), src.globalConstants.initData, src.globalConstants.size, src.globalVariables.initData, src.globalVariables.size, src.globalStrings, src.externalFunctions); diff --git a/opencl/source/program/program.h b/opencl/source/program/program.h index db36eadaf8..df49701386 100644 --- a/opencl/source/program/program.h +++ b/opencl/source/program/program.h @@ -247,6 +247,7 @@ class Program : public BaseObject<_cl_program> { MOCKABLE_VIRTUAL std::string getInternalOptions() const; uint32_t getMaxRootDeviceIndex() const { return maxRootDeviceIndex; } uint32_t getIndirectDetectionVersion() const { return indirectDetectionVersion; } + bool getFunctionPointerWithIndirectAccessExists() const { return functionPointerWithIndirectAccessExists; } void retainForKernel() { std::unique_lock lock{lockMutex}; exposedKernels++; @@ -372,6 +373,7 @@ class Program : public BaseObject<_cl_program> { ClDeviceVector clDevicesInProgram; uint32_t indirectDetectionVersion = 0u; + bool functionPointerWithIndirectAccessExists = false; bool isBuiltIn = false; bool isGeneratedByIgc = true; diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index a13d7a1fd9..da3ca96a60 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -1776,6 +1776,66 @@ HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNor memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); } +HWTEST_F(KernelResidencyTest, givenKernelWithExternalFunctionWithIndirectAccessAndDetectIndirectAccessInKernelEnabledThenKernelHasIndirectAccessIsSetToTrue) { + DebugManagerStateRestore restorer; + debugManager.flags.DetectIndirectAccessInKernel.set(1); + auto pKernelInfo = std::make_unique(); + pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; + pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false; + pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false; + pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false; + pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = false; + pKernelInfo->kernelDescriptor.kernelAttributes.flags.useStackCalls = true; + + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + commandStreamReceiver.storeMakeResidentAllocations = true; + + auto memoryManager = commandStreamReceiver.getMemoryManager(); + pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); + + MockProgram program(toClDeviceVector(*pClDevice)); + MockContext ctx; + program.setContext(&ctx); + program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); + program.functionPointerWithIndirectAccessExists = true; + std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); + + EXPECT_TRUE(kernel->getHasIndirectAccess()); + + memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); +} + +HWTEST_F(KernelResidencyTest, givenKernelWithExternalFunctionWithIndirectAccessButNoUseStackCallsAndDetectIndirectAccessInKernelEnabledThenKernelHasIndirectAccessIsSetToFalse) { + DebugManagerStateRestore restorer; + debugManager.flags.DetectIndirectAccessInKernel.set(1); + auto pKernelInfo = std::make_unique(); + pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; + pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false; + pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false; + pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false; + pKernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = false; + pKernelInfo->kernelDescriptor.kernelAttributes.flags.useStackCalls = false; + + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + commandStreamReceiver.storeMakeResidentAllocations = true; + + auto memoryManager = commandStreamReceiver.getMemoryManager(); + pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); + + MockProgram program(toClDeviceVector(*pClDevice)); + MockContext ctx; + program.setContext(&ctx); + program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); + program.functionPointerWithIndirectAccessExists = true; + std::unique_ptr kernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); + ASSERT_EQ(CL_SUCCESS, kernel->initialize()); + + EXPECT_FALSE(kernel->getHasIndirectAccess()); + + memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); +} + HWTEST_F(KernelResidencyTest, givenKernelWithPtrByValueArgumentAndDetectIndirectAccessInKernelEnabledThenKernelHasIndirectAccessIsSetToTrue) { DebugManagerStateRestore restorer; debugManager.flags.DetectIndirectAccessInKernel.set(1); diff --git a/opencl/test/unit_test/mocks/mock_program.h b/opencl/test/unit_test/mocks/mock_program.h index 30a0699468..3f9fe153ee 100644 --- a/opencl/test/unit_test/mocks/mock_program.h +++ b/opencl/test/unit_test/mocks/mock_program.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -55,6 +55,7 @@ class MockProgram : public Program { using Program::deviceBuildInfos; using Program::disableZebinIfVmeEnabled; using Program::extractInternalOptions; + using Program::functionPointerWithIndirectAccessExists; using Program::getKernelInfo; using Program::getModuleAllocations; using Program::internalOptionsToExtract; diff --git a/shared/source/device_binary_format/zebin/zeinfo_decoder.cpp b/shared/source/device_binary_format/zebin/zeinfo_decoder.cpp index e3f3dc8d6a..1bd00862a4 100644 --- a/shared/source/device_binary_format/zebin/zeinfo_decoder.cpp +++ b/shared/source/device_binary_format/zebin/zeinfo_decoder.cpp @@ -9,6 +9,7 @@ #include "shared/source/compiler_interface/external_functions.h" #include "shared/source/debug_settings/debug_settings_manager.h" +#include "shared/source/device_binary_format/zebin/zebin_elf.h" #include "shared/source/device_binary_format/zebin/zeinfo_enum_lookup.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/basic_math.h" @@ -494,6 +495,9 @@ DecodeError decodeZeInfoKernels(ProgramInfo &dst, Yaml::YamlParser &parser, cons if (DecodeError::success != zeInfoErr) { return zeInfoErr; } + if (kernelInfo->kernelDescriptor.kernelMetadata.kernelName == Zebin::Elf::SectionNames::externalFunctions) { + dst.functionPointerWithIndirectAccessExists |= kernelInfo->kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess; + } dst.kernelInfos.push_back(kernelInfo.release()); } diff --git a/shared/source/program/program_info.h b/shared/source/program/program_info.h index afc906a79d..713da6722a 100644 --- a/shared/source/program/program_info.h +++ b/shared/source/program/program_info.h @@ -48,6 +48,7 @@ struct ProgramInfo { uint32_t minScratchSpaceSize = 0U; uint32_t indirectDetectionVersion = 0U; size_t kernelMiscInfoPos = std::string::npos; + bool functionPointerWithIndirectAccessExists = false; }; size_t getMaxInlineSlmNeeded(const ProgramInfo &programInfo); diff --git a/shared/test/common/mocks/mock_modules_zebin.h b/shared/test/common/mocks/mock_modules_zebin.h index e8a1b69e2d..0ea0dbd711 100644 --- a/shared/test/common/mocks/mock_modules_zebin.h +++ b/shared/test/common/mocks/mock_modules_zebin.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -69,6 +69,7 @@ kernels: - name: Intel_Symbol_Table_Void_Program execution_env: simd_size: 8 + indirect_stateless_count: 1 functions: - name: fun0 execution_env: diff --git a/shared/test/unit_test/device_binary_format/device_binary_formats_tests.cpp b/shared/test/unit_test/device_binary_format/device_binary_formats_tests.cpp index 8c86bbd130..ac3762f602 100644 --- a/shared/test/unit_test/device_binary_format/device_binary_formats_tests.cpp +++ b/shared/test/unit_test/device_binary_format/device_binary_formats_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -343,6 +343,7 @@ TEST(DecodeSingleDeviceBinary, GivenZebinWithExternalFunctionsThenDecodingSuccee EXPECT_TRUE(decodeErrors.empty()) << decodeErrors; EXPECT_NE(nullptr, programInfo.linkerInput.get()); EXPECT_EQ(1, programInfo.linkerInput->getExportedFunctionsSegmentId()); + EXPECT_TRUE(programInfo.functionPointerWithIndirectAccessExists); } TEST(DecodeSingleDeviceBinary, GivenOclElfFormatThenDecodingFails) { diff --git a/shared/test/unit_test/device_binary_format/zebin_decoder_tests.cpp b/shared/test/unit_test/device_binary_format/zebin_decoder_tests.cpp index 97b9ed5624..0beae2a534 100644 --- a/shared/test/unit_test/device_binary_format/zebin_decoder_tests.cpp +++ b/shared/test/unit_test/device_binary_format/zebin_decoder_tests.cpp @@ -6532,6 +6532,8 @@ functions: EXPECT_EQ(128U, fun1Info.numGrfRequired); EXPECT_EQ(8U, fun1Info.simdSize); EXPECT_EQ(0U, fun1Info.barrierCount); + + EXPECT_FALSE(programInfo.functionPointerWithIndirectAccessExists); } } @@ -6572,6 +6574,8 @@ functions: EXPECT_EQ(128U, fun0Info.numGrfRequired); EXPECT_EQ(8U, fun0Info.simdSize); EXPECT_EQ(1U, fun0Info.barrierCount); + + EXPECT_FALSE(programInfo.functionPointerWithIndirectAccessExists); } TEST(PopulateZeInfoExternalFunctionsMetadata, GivenInvalidExternalFunctionsMetadataThenFail) {