diff --git a/level_zero/core/source/module/module_imp.cpp b/level_zero/core/source/module/module_imp.cpp index d3899fc1ad..526dfad81d 100644 --- a/level_zero/core/source/module/module_imp.cpp +++ b/level_zero/core/source/module/module_imp.cpp @@ -15,6 +15,7 @@ #include "shared/source/device_binary_format/elf/elf.h" #include "shared/source/device_binary_format/elf/elf_encoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" +#include "shared/source/helpers/addressing_mode_helper.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/kernel_helpers.h" @@ -41,6 +42,7 @@ namespace BuildOptions { NEO::ConstStringRef optDisable = "-ze-opt-disable"; NEO::ConstStringRef optLevel = "-ze-opt-level"; NEO::ConstStringRef greaterThan4GbRequired = "-ze-opt-greater-than-4GB-buffer-required"; +NEO::ConstStringRef smallerThan4GbBuffersOnly = "-ze-opt-smaller-than-4GB-buffers-only"; NEO::ConstStringRef hasBufferOffsetArg = "-ze-intel-has-buffer-offset-arg"; NEO::ConstStringRef debugKernelEnable = "-ze-kernel-debug-enable"; } // namespace BuildOptions @@ -122,8 +124,8 @@ std::string ModuleTranslationUnit::generateCompilerOptions(const char *buildOpti internalOptions = NEO::CompilerOptions::concatenate(internalOptions, BuildOptions::debugKernelEnable); } - if (NEO::DebugManager.flags.DisableStatelessToStatefulOptimization.get() || - device->getNEODevice()->areSharedSystemAllocationsAllowed()) { + auto sharedSystemAllocationsAllowed = device->getNEODevice()->areSharedSystemAllocationsAllowed(); + if (NEO::DebugManager.flags.DisableStatelessToStatefulOptimization.get() || NEO::AddressingModeHelper::forceToStatelessNeeded(options, BuildOptions::smallerThan4GbBuffersOnly.str(), sharedSystemAllocationsAllowed)) { internalOptions = NEO::CompilerOptions::concatenate(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired); } @@ -529,6 +531,12 @@ bool ModuleImp::initialize(const ze_module_desc_t *desc, NEO::Device *neoDevice) this->updateBuildLog(neoDevice); verifyDebugCapabilities(); + auto isUserKernel = (this->type == ModuleType::User); + auto sharedSystemAllocationsAllowed = device->getNEODevice()->areSharedSystemAllocationsAllowed(); + if (NEO::AddressingModeHelper::containsStatefulAccess(this->translationUnit->programInfo.kernelInfos) && NEO::AddressingModeHelper::forceToStatelessNeeded(this->translationUnit->options, BuildOptions::smallerThan4GbBuffersOnly.str(), sharedSystemAllocationsAllowed) && isUserKernel) { + success = false; + } + if (false == success) { return false; } diff --git a/level_zero/core/test/unit_tests/sources/module/test_module.cpp b/level_zero/core/test/unit_tests/sources/module/test_module.cpp index f36b46bb07..67a9cbe467 100644 --- a/level_zero/core/test/unit_tests/sources/module/test_module.cpp +++ b/level_zero/core/test/unit_tests/sources/module/test_module.cpp @@ -8,6 +8,7 @@ #include "shared/source/device_binary_format/debug_zebin.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" +#include "shared/source/helpers/addressing_mode_helper.h" #include "shared/source/kernel/implicit_args.h" #include "shared/source/program/kernel_info.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" @@ -1815,6 +1816,150 @@ TEST_F(ModuleTest, GivenInjectInternalBuildOptionsWhenBuildingBuiltinModuleThenI EXPECT_FALSE(CompilerOptions::contains(cip->buildInternalOptions, "-abc")); }; +TEST_F(ModuleTest, givenSharedSystemAllocationsSupportWhenGenerateCompilerOptionsThenOptionsAreCorrect) { + auto areSharedSystemAllocationsSupported = device->getNEODevice()->areSharedSystemAllocationsAllowed(); + if (!areSharedSystemAllocationsSupported) { + GTEST_SKIP(); + } + + DebugManagerStateRestore restorer; + auto module = std::make_unique(device, nullptr, ModuleType::User); + ASSERT_NE(nullptr, module); + auto moduleTranslationUnit = module->getTranslationUnit(); + ASSERT_NE(nullptr, moduleTranslationUnit); + std::string buildOptions; + std::string internalBuildOptions; + + { + NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(-1); + buildOptions = ""; + auto internalOptions = moduleTranslationUnit->generateCompilerOptions(buildOptions.c_str(), internalBuildOptions.c_str()); + EXPECT_THAT(internalOptions, testing::HasSubstr("-cl-intel-greater-than-4GB-buffer-required")); + } + { + NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(-1); + buildOptions = "-ze-opt-smaller-than-4GB-buffers-only"; + auto internalOptions = moduleTranslationUnit->generateCompilerOptions(buildOptions.c_str(), internalBuildOptions.c_str()); + EXPECT_THAT(internalOptions, testing::Not(testing::HasSubstr("-cl-intel-greater-than-4GB-buffer-required"))); + } + { + NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0); + buildOptions = ""; + auto internalOptions = moduleTranslationUnit->generateCompilerOptions(buildOptions.c_str(), internalBuildOptions.c_str()); + EXPECT_THAT(internalOptions, testing::HasSubstr("-cl-intel-greater-than-4GB-buffer-required")); + } + { + NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0); + buildOptions = "-ze-opt-smaller-than-4GB-buffers-only"; + auto internalOptions = moduleTranslationUnit->generateCompilerOptions(buildOptions.c_str(), internalBuildOptions.c_str()); + EXPECT_THAT(internalOptions, testing::HasSubstr("-cl-intel-greater-than-4GB-buffer-required")); + } + { + NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(1); + buildOptions = ""; + auto internalOptions = moduleTranslationUnit->generateCompilerOptions(buildOptions.c_str(), internalBuildOptions.c_str()); + EXPECT_THAT(internalOptions, testing::Not(testing::HasSubstr("-cl-intel-greater-than-4GB-buffer-required"))); + } + { + NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(1); + buildOptions = "-ze-opt-smaller-than-4GB-buffers-only"; + auto internalOptions = moduleTranslationUnit->generateCompilerOptions(buildOptions.c_str(), internalBuildOptions.c_str()); + EXPECT_THAT(internalOptions, testing::Not(testing::HasSubstr("-cl-intel-greater-than-4GB-buffer-required"))); + } +} + +TEST_F(ModuleTest, whenContainsStatefulAccessIsCalledThenResultIsCorrect) { + class MyModuleImpl : public ModuleImp { + public: + using ModuleImp::ModuleImp; + }; + + std::vector> testParams = { + {false, undefined, undefined}, + {true, 0x40, undefined}, + {true, undefined, 0x40}, + {true, 0x40, 0x40}, + }; + + for (auto &[expectedResult, surfaceStateHeapOffset, crossThreadDataOffset] : testParams) { + auto module = std::make_unique(device, nullptr, ModuleType::User); + ASSERT_NE(nullptr, module); + auto moduleTranslationUnit = module->getTranslationUnit(); + ASSERT_NE(nullptr, moduleTranslationUnit); + auto kernelInfo = std::make_unique(); + kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.clear(); + auto argDescriptor = ArgDescriptor(ArgDescriptor::ArgTPointer); + argDescriptor.as().bindful = surfaceStateHeapOffset; + argDescriptor.as().bindless = crossThreadDataOffset; + kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor); + moduleTranslationUnit->programInfo.kernelInfos.clear(); + moduleTranslationUnit->programInfo.kernelInfos.push_back(kernelInfo.release()); + + EXPECT_EQ(expectedResult, NEO::AddressingModeHelper::containsStatefulAccess(moduleTranslationUnit->programInfo.kernelInfos)); + } +} + +using ModuleInitializeTest = Test; + +TEST_F(ModuleInitializeTest, whenModuleInitializeIsCalledThenCorrectResultIsReturned) { + DebugManagerStateRestore restorer; + class MockModuleImp : public ModuleImp { + public: + using ModuleImp::ModuleImp; + using ModuleImp::translationUnit; + + void setAddressingMode(bool isStateful) { + auto kernelInfo = std::make_unique(); + kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.clear(); + auto argDescriptor = ArgDescriptor(ArgDescriptor::ArgTPointer); + if (isStateful) { + argDescriptor.as().bindful = 0x40; + argDescriptor.as().bindless = 0x40; + } else { + argDescriptor.as().bindful = undefined; + argDescriptor.as().bindless = undefined; + } + kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor); + kernelInfo->heapInfo.KernelHeapSize = 0x1; + kernelInfo->heapInfo.pKernelHeap = reinterpret_cast(0xffff); + + this->translationUnit->programInfo.kernelInfos.clear(); + this->translationUnit->programInfo.kernelInfos.push_back(kernelInfo.release()); + } + }; + + class MyMockModuleTU : public MockModuleTU { + public: + using MockModuleTU::MockModuleTU; + bool createFromNativeBinary(const char *input, size_t inputSize) { return true; } + }; + + std::string testFile; + retrieveBinaryKernelFilenameNoRevision(testFile, "test_kernel_", ".bin"); + size_t size = 0; + auto src = loadDataFromFile(testFile.c_str(), size); + ASSERT_NE(0u, size); + ASSERT_NE(nullptr, src); + ze_module_desc_t moduleDesc = {}; + moduleDesc.format = ZE_MODULE_FORMAT_NATIVE; + moduleDesc.pInputModule = reinterpret_cast(src.get()); + moduleDesc.inputSize = size; + device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.sharedSystemMemCapabilities = 1; + + std::array, 4> testParams = {{{true, false, ModuleType::Builtin, -1}, + {true, true, ModuleType::Builtin, 1}, + {true, true, ModuleType::Builtin, 0}, + {false, true, ModuleType::User, 0}}}; + + for (auto &[expectedResult, isStateful, moduleType, debugKey] : testParams) { + MockModuleImp module(device, nullptr, moduleType); + module.translationUnit = std::make_unique(device); + DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(debugKey); + module.setAddressingMode(isStateful); + EXPECT_EQ(expectedResult, module.initialize(&moduleDesc, device->getNEODevice())); + } +} + using ModuleDebugDataTest = Test; TEST_F(ModuleDebugDataTest, GivenDebugDataWithRelocationsWhenCreatingRelocatedDebugDataThenRelocationsAreApplied) { auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions(); diff --git a/opencl/source/program/build.cpp b/opencl/source/program/build.cpp index 3627595d77..81a09ba1af 100644 --- a/opencl/source/program/build.cpp +++ b/opencl/source/program/build.cpp @@ -9,6 +9,7 @@ #include "shared/source/device/device.h" #include "shared/source/device_binary_format/device_binary_formats.h" #include "shared/source/execution_environment/execution_environment.h" +#include "shared/source/helpers/addressing_mode_helper.h" #include "shared/source/helpers/compiler_options_parser.h" #include "shared/source/program/kernel_info.h" #include "shared/source/source_level_debugger/source_level_debugger.h" @@ -34,8 +35,7 @@ cl_int Program::build( const char *buildOptions, bool enableCaching) { cl_int retVal = CL_SUCCESS; - std::string internalOptions; - initInternalOptions(internalOptions); + auto defaultClDevice = deviceVector[0]; UNRECOVERABLE_IF(defaultClDevice == nullptr); auto &defaultDevice = defaultClDevice->getDevice(); @@ -69,6 +69,9 @@ cl_int Program::build( } else if (this->createdFrom != CreatedFrom::BINARY) { options = ""; } + std::string internalOptions; + initInternalOptions(internalOptions); + extractInternalOptions(options, internalOptions); applyAdditionalOptions(internalOptions); @@ -170,6 +173,12 @@ cl_int Program::build( phaseReached[clDevice->getRootDeviceIndex()] = BuildPhase::BinaryProcessing; } + const auto &kernelInfoArray = buildInfos[clDevices[0]->getRootDeviceIndex()].kernelInfoArray; + auto sharedSystemAllocationsAllowed = clDevices[0]->areSharedSystemAllocationsAllowed(); + if (AddressingModeHelper::containsStatefulAccess(kernelInfoArray) && AddressingModeHelper::forceToStatelessNeeded(options, CompilerOptions::smallerThan4gbBuffersOnly.str(), sharedSystemAllocationsAllowed) && !isBuiltIn) { + retVal = CL_BUILD_PROGRAM_FAILURE; + } + if (retVal != CL_SUCCESS) { break; } diff --git a/opencl/source/program/program.cpp b/opencl/source/program/program.cpp index c0b847c5b4..dbe1f45985 100644 --- a/opencl/source/program/program.cpp +++ b/opencl/source/program/program.cpp @@ -15,6 +15,7 @@ #include "shared/source/device_binary_format/elf/elf_encoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/device_binary_format/patchtokens_decoder.h" +#include "shared/source/helpers/addressing_mode_helper.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/compiler_options_parser.h" #include "shared/source/helpers/debug_helpers.h" @@ -74,7 +75,8 @@ void Program::initInternalOptions(std::string &internalOptions) const { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::arch32bit); } - if ((isBuiltIn && is32bit) || pClDevice->areSharedSystemAllocationsAllowed() || + auto sharedSystemAllocationsAllowed = clDevices[0]->areSharedSystemAllocationsAllowed(); + if ((isBuiltIn && is32bit) || AddressingModeHelper::forceToStatelessNeeded(options, CompilerOptions::smallerThan4gbBuffersOnly.str(), sharedSystemAllocationsAllowed) || DebugManager.flags.DisableStatelessToStatefulOptimization.get()) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::greaterThan4gbBuffersRequired); } diff --git a/opencl/test/unit_test/program/program_tests.cpp b/opencl/test/unit_test/program/program_tests.cpp index 140e5d81fc..abb4323e83 100644 --- a/opencl/test/unit_test/program/program_tests.cpp +++ b/opencl/test/unit_test/program/program_tests.cpp @@ -13,6 +13,7 @@ #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/device_binary_format/patchtokens_decoder.h" #include "shared/source/gmm_helper/gmm_helper.h" +#include "shared/source/helpers/addressing_mode_helper.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/hash.h" #include "shared/source/helpers/hw_helper.h" @@ -1653,6 +1654,137 @@ TEST_F(ProgramTests, WhenProgramIsCreatedThenCorrectOclVersionIsInOptions) { } } +TEST_F(ProgramTests, whenForceToStatelessNeededIsCalledThenCorrectResultIsReturned) { + DebugManagerStateRestore restorer; + + class MyMockProgram : public Program { + public: + using Program::options; + using Program::Program; + }; + + MyMockProgram program(pContext, false, toClDeviceVector(*pClDevice)); + auto sharedSystemAllocationsAllowed = pClDevice->areSharedSystemAllocationsAllowed(); + + { + DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(-1); + program.options = ""; + EXPECT_EQ(AddressingModeHelper::forceToStatelessNeeded(program.options, NEO::CompilerOptions::smallerThan4gbBuffersOnly.str(), sharedSystemAllocationsAllowed), sharedSystemAllocationsAllowed); + } + { + DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(-1); + program.options = "-cl-opt-smaller-than-4GB-buffers-only"; + EXPECT_FALSE(AddressingModeHelper::forceToStatelessNeeded(program.options, NEO::CompilerOptions::smallerThan4gbBuffersOnly.str(), sharedSystemAllocationsAllowed)); + } + { + DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0); + program.options = ""; + EXPECT_EQ(AddressingModeHelper::forceToStatelessNeeded(program.options, NEO::CompilerOptions::smallerThan4gbBuffersOnly.str(), sharedSystemAllocationsAllowed), sharedSystemAllocationsAllowed); + } + { + DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0); + program.options = "-cl-opt-smaller-than-4GB-buffers-only"; + EXPECT_EQ(AddressingModeHelper::forceToStatelessNeeded(program.options, NEO::CompilerOptions::smallerThan4gbBuffersOnly.str(), sharedSystemAllocationsAllowed), sharedSystemAllocationsAllowed); + } + { + DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(1); + program.options = ""; + EXPECT_FALSE(AddressingModeHelper::forceToStatelessNeeded(program.options, NEO::CompilerOptions::smallerThan4gbBuffersOnly.str(), sharedSystemAllocationsAllowed)); + } + { + DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(1); + program.options = "-cl-opt-smaller-than-4GB-buffers-only"; + EXPECT_FALSE(AddressingModeHelper::forceToStatelessNeeded(program.options, NEO::CompilerOptions::smallerThan4gbBuffersOnly.str(), sharedSystemAllocationsAllowed)); + } +} + +TEST_F(ProgramTests, whenContainsStatefulAccessIsCalledThenReturnCorrectResult) { + std::vector> testParams = { + {false, undefined, undefined}, + {true, 0x40, undefined}, + {true, undefined, 0x40}, + {true, 0x40, 0x40}, + + }; + + for (auto &[expectedResult, surfaceStateHeapOffset, crossThreadDataOffset] : testParams) { + MockProgram program(pContext, false, toClDeviceVector(*pClDevice)); + auto kernelInfo = std::make_unique(); + kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.clear(); + auto argDescriptor = ArgDescriptor(ArgDescriptor::ArgTPointer); + argDescriptor.as().bindful = surfaceStateHeapOffset; + argDescriptor.as().bindless = crossThreadDataOffset; + + kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor); + program.addKernelInfo(kernelInfo.release(), 0); + + EXPECT_EQ(expectedResult, AddressingModeHelper::containsStatefulAccess(program.buildInfos[0].kernelInfoArray)); + } +} + +TEST_F(ProgramTests, givenStatefulAndStatelessAccessesWhenProgramBuildIsCalledThenCorrectResultIsReturned) { + DebugManagerStateRestore restorer; + + class MyMockProgram : public Program { + public: + using Program::buildInfos; + using Program::createdFrom; + using Program::irBinary; + using Program::irBinarySize; + using Program::isBuiltIn; + using Program::options; + using Program::Program; + using Program::sourceCode; + + void setAddressingMode(bool isStateful) { + auto kernelInfo = std::make_unique(); + kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.clear(); + auto argDescriptor = ArgDescriptor(ArgDescriptor::ArgTPointer); + if (isStateful) { + argDescriptor.as().bindful = 0x40; + argDescriptor.as().bindless = 0x40; + } else { + argDescriptor.as().bindful = undefined; + argDescriptor.as().bindless = undefined; + } + + kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor); + this->buildInfos[0].kernelInfoArray.clear(); + this->buildInfos[0].kernelInfoArray.push_back(kernelInfo.release()); + } + + cl_int processGenBinary(const ClDevice &clDevice) override { + return CL_SUCCESS; + } + }; + + pClDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.sharedSystemMemCapabilities = 1; + + std::array, 3> testParams = {{{CL_SUCCESS, false, -1}, + {CL_SUCCESS, true, 1}, + {CL_BUILD_PROGRAM_FAILURE, true, 0}}}; + for (auto &[result, isStatefulAccess, debuyKey] : testParams) { + MyMockProgram program(pContext, false, toClDeviceVector(*pClDevice)); + program.isBuiltIn = false; + program.sourceCode = "test_kernel"; + program.createdFrom = Program::CreatedFrom::SOURCE; + program.setAddressingMode(isStatefulAccess); + DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(debuyKey); + EXPECT_EQ(result, program.build(toClDeviceVector(*pClDevice), nullptr, false)); + } + + { + MyMockProgram programWithBuiltIn(pContext, true, toClDeviceVector(*pClDevice)); + programWithBuiltIn.irBinary.reset(new char[16]); + programWithBuiltIn.irBinarySize = 16; + programWithBuiltIn.isBuiltIn = true; + programWithBuiltIn.setAddressingMode(true); + DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0); + pClDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.sharedSystemMemCapabilities = 1u; + EXPECT_EQ(CL_SUCCESS, programWithBuiltIn.build(toClDeviceVector(*pClDevice), nullptr, false)); + } +} + TEST_F(ProgramTests, GivenForcedClVersionWhenProgramIsCreatedThenCorrectOclOptionIsPresent) { std::pair testedValues[] = { {0, "-ocl-version=120"}, diff --git a/opencl/test/unit_test/test_files/igdrcl.config b/opencl/test/unit_test/test_files/igdrcl.config index a1116cee3e..318623e862 100644 --- a/opencl/test/unit_test/test_files/igdrcl.config +++ b/opencl/test/unit_test/test_files/igdrcl.config @@ -112,6 +112,7 @@ ForceCsrFlushing = 0 ForceCsrReprogramming = 0 OmitTimestampPacketDependencies = 0 DisableStatelessToStatefulOptimization = 0 +UseSmallerThan4gbBuffersOnly = -1 DisableConcurrentBlockExecution = 0 UseNoRingFlushesKmdMode = 1 DisableZeroCopyForUseHostPtr = 0 diff --git a/opencl/test/unit_test/test_files/kernel_num_args.cl b/opencl/test/unit_test/test_files/kernel_num_args.cl index 4b2236b833..ca2fe02067 100644 --- a/opencl/test/unit_test/test_files/kernel_num_args.cl +++ b/opencl/test/unit_test/test_files/kernel_num_args.cl @@ -6,4 +6,5 @@ */ __kernel void test(__global float *argGlobal, __read_only image3d_t argImg3D, __constant float *argConst) { + argGlobal[0] = argConst[0]; } diff --git a/opencl/test/unit_test/test_files/test_constant_memory.cl b/opencl/test/unit_test/test_files/test_constant_memory.cl index 3e98f6fa92..4dcc1289ad 100644 --- a/opencl/test/unit_test/test_files/test_constant_memory.cl +++ b/opencl/test/unit_test/test_files/test_constant_memory.cl @@ -10,6 +10,6 @@ __constant uint constant_a[2] = {0xabcd5432u, 0xaabb5533u}; __kernel void test(__global uint *in, __global uint *out) { int i = get_global_id(0); int j = get_global_id(0) % (sizeof(constant_a) / sizeof(constant_a[0])); - + in[0] = 0; out[i] = constant_a[j]; } diff --git a/shared/source/compiler_interface/compiler_options/compiler_options_base.h b/shared/source/compiler_interface/compiler_options/compiler_options_base.h index cd973a0a2a..08890f4621 100644 --- a/shared/source/compiler_interface/compiler_options/compiler_options_base.h +++ b/shared/source/compiler_interface/compiler_options/compiler_options_base.h @@ -13,6 +13,7 @@ namespace NEO { namespace CompilerOptions { static constexpr ConstStringRef greaterThan4gbBuffersRequired = "-cl-intel-greater-than-4GB-buffer-required"; +static constexpr ConstStringRef smallerThan4gbBuffersOnly = "-cl-opt-smaller-than-4GB-buffers-only"; static constexpr ConstStringRef hasBufferOffsetArg = "-cl-intel-has-buffer-offset-arg"; static constexpr ConstStringRef kernelDebugEnable = "-cl-kernel-debug-enable"; static constexpr ConstStringRef arch32bit = "-m32"; diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 44fb321839..600a84aeba 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -215,6 +215,7 @@ DECLARE_DEBUG_VARIABLE(bool, DisableStatelessToStatefulOptimization, false, "Dis DECLARE_DEBUG_VARIABLE(bool, DisableConcurrentBlockExecution, false, "disables concurrent block kernel execution") DECLARE_DEBUG_VARIABLE(bool, UseNoRingFlushesKmdMode, true, "Windows only, passes flag to KMD that informs KMD to not emit any ring buffer flushes.") DECLARE_DEBUG_VARIABLE(bool, DisableZeroCopyForUseHostPtr, false, "When active all buffer allocations created with CL_MEM_USE_HOST_PTR flag will not share memory with CPU.") +DECLARE_DEBUG_VARIABLE(int32_t, UseSmallerThan4gbBuffersOnly, -1, " -1: default, 0: disabled, 1: enabled. When enabled driver will not force stateless accesses when shared system USM is active") DECLARE_DEBUG_VARIABLE(int32_t, EnableHostPtrTracking, -1, "Enable host ptr tracking: -1 - default platform setting, 0 - disabled, 1 - enabled") DECLARE_DEBUG_VARIABLE(int32_t, MaxHwThreadsPercent, 0, "If not zero then maximum number of used HW threads is capped to max * MaxHwThreadsPercent / 100") DECLARE_DEBUG_VARIABLE(int32_t, MinHwThreadsUnoccupied, 0, "If not zero then maximum number of used HW threads is reduced by MinHwThreadsUnoccupied") diff --git a/shared/source/helpers/CMakeLists.txt b/shared/source/helpers/CMakeLists.txt index 2b85fc01b8..18886c1d27 100644 --- a/shared/source/helpers/CMakeLists.txt +++ b/shared/source/helpers/CMakeLists.txt @@ -7,6 +7,8 @@ set(NEO_CORE_HELPERS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/abort.h + ${CMAKE_CURRENT_SOURCE_DIR}/addressing_mode_helper.h + ${CMAKE_CURRENT_SOURCE_DIR}/addressing_mode_helper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/address_patch.h ${CMAKE_CURRENT_SOURCE_DIR}/affinity_mask.h ${CMAKE_CURRENT_SOURCE_DIR}/aligned_memory.h diff --git a/shared/source/helpers/addressing_mode_helper.cpp b/shared/source/helpers/addressing_mode_helper.cpp new file mode 100644 index 0000000000..e1fcbaec7b --- /dev/null +++ b/shared/source/helpers/addressing_mode_helper.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/helpers/addressing_mode_helper.h" + +#include "shared/source/compiler_interface/compiler_options/compiler_options_base.h" +#include "shared/source/debug_settings/debug_settings_manager.h" +#include "shared/source/program/kernel_info.h" + +namespace NEO::AddressingModeHelper { + +bool forceToStatelessNeeded(const std::string &options, const std::string &smallerThan4GbBuffersOnlyOption, bool sharedSystemAllocationsAllowed) { + auto preferStateful = false; + if (NEO::CompilerOptions::contains(options, smallerThan4GbBuffersOnlyOption)) { + preferStateful = true; + } + if (NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.get() != -1) { + preferStateful = static_cast(NEO::DebugManager.flags.UseSmallerThan4gbBuffersOnly.get()); + } + auto forceStateless = !preferStateful && sharedSystemAllocationsAllowed; + return forceStateless; +} + +bool containsStatefulAccess(const std::vector &kernelInfos) { + for (const auto &kernelInfo : kernelInfos) { + for (const auto &arg : kernelInfo->kernelDescriptor.payloadMappings.explicitArgs) { + auto isStatefulAccess = arg.is() && + (NEO::isValidOffset(arg.as().bindless) || + NEO::isValidOffset(arg.as().bindful)); + if (isStatefulAccess) { + return true; + } + } + } + return false; +} + +} // namespace NEO::AddressingModeHelper diff --git a/shared/source/helpers/addressing_mode_helper.h b/shared/source/helpers/addressing_mode_helper.h new file mode 100644 index 0000000000..020eb0e09d --- /dev/null +++ b/shared/source/helpers/addressing_mode_helper.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include +#include + +namespace NEO { +struct KernelInfo; + +namespace AddressingModeHelper { +bool forceToStatelessNeeded(const std::string &options, const std::string &smallerThan4GbBuffersOnly, bool sharedSystemAllocationsAllowed); +bool containsStatefulAccess(const std::vector &kernelInfos); + +} // namespace AddressingModeHelper +} // namespace NEO