From 99db73c03442cee84807091a3b0c6a91fc9bf784 Mon Sep 17 00:00:00 2001 From: Kamil Kopryk Date: Fri, 13 May 2022 11:49:25 +0000 Subject: [PATCH] Add debug flag to fail build program with stateful access I've added debug flag FailBuildProgramWithStatefulAccess which makes possible to fail build program/module creation with stateful access(except builtins) on pvc and later platforms. Related-To: NEO-6075 Signed-off-by: Kamil Kopryk --- level_zero/core/source/module/module_imp.cpp | 24 +++- level_zero/core/source/module/module_imp.h | 2 +- .../unit_tests/sources/module/test_module.cpp | 129 +++++++++++++++--- opencl/source/program/build.cpp | 13 +- opencl/source/program/program.cpp | 9 +- .../unit_test/built_ins/built_in_tests.cpp | 20 ++- .../test/unit_test/program/program_tests.cpp | 122 ++++++++++++++--- .../test/unit_test/test_files/igdrcl.config | 1 + .../debug_settings/debug_variables_base.inl | 1 + shared/source/helpers/CMakeLists.txt | 2 + .../source/helpers/addressing_mode_helper.cpp | 42 ++++++ .../source/helpers/addressing_mode_helper.h | 20 +++ 12 files changed, 335 insertions(+), 50 deletions(-) create mode 100644 shared/source/helpers/addressing_mode_helper.cpp create mode 100644 shared/source/helpers/addressing_mode_helper.h diff --git a/level_zero/core/source/module/module_imp.cpp b/level_zero/core/source/module/module_imp.cpp index 54e8c9fc99..4c4d571191 100644 --- a/level_zero/core/source/module/module_imp.cpp +++ b/level_zero/core/source/module/module_imp.cpp @@ -16,6 +16,7 @@ #include "shared/source/device_binary_format/elf/elf.h" #include "shared/source/device_binary_format/elf/elf_encoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" +#include "shared/source/helpers/addressing_mode_helper.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/compiler_hw_info_config.h" #include "shared/source/helpers/constants.h" @@ -116,8 +117,9 @@ std::string ModuleTranslationUnit::generateCompilerOptions(const char *buildOpti options = buildOptions; } std::string internalOptions = NEO::CompilerOptions::concatenate(internalBuildOptions, BuildOptions::hasBufferOffsetArg); + auto &neoDevice = *device->getNEODevice(); - if (device->getNEODevice()->getDeviceInfo().debuggerActive) { + if (neoDevice.getDeviceInfo().debuggerActive) { if (NEO::SourceLevelDebugger::shouldAppendOptDisable(*device->getSourceLevelDebugger())) { NEO::CompilerOptions::concatenateAppend(options, BuildOptions::optDisable); } @@ -126,8 +128,11 @@ std::string ModuleTranslationUnit::generateCompilerOptions(const char *buildOpti internalOptions = NEO::CompilerOptions::concatenate(internalOptions, BuildOptions::debugKernelEnable); } - if (NEO::DebugManager.flags.DisableStatelessToStatefulOptimization.get() || - device->getNEODevice()->areSharedSystemAllocationsAllowed()) { + const auto &compilerHwInfoConfig = *NEO::CompilerHwInfoConfig::get(neoDevice.getHardwareInfo().platform.eProductFamily); + auto forceToStatelessRequired = compilerHwInfoConfig.isForceToStatelessRequired(); + auto statelessToStatefulOptimizationDisabled = NEO::DebugManager.flags.DisableStatelessToStatefulOptimization.get(); + + if (forceToStatelessRequired || statelessToStatefulOptimizationDisabled) { internalOptions = NEO::CompilerOptions::concatenate(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired); } @@ -527,6 +532,18 @@ bool ModuleImp::initialize(const ze_module_desc_t *desc, NEO::Device *neoDevice) this->updateBuildLog(neoDevice); verifyDebugCapabilities(); + auto &hwInfo = neoDevice->getHardwareInfo(); + auto containsStatefulAccess = NEO::AddressingModeHelper::containsStatefulAccess(translationUnit->programInfo.kernelInfos); + auto isUserKernel = (type == ModuleType::User); + + auto failBuildProgram = containsStatefulAccess && + isUserKernel && + NEO::AddressingModeHelper::failBuildProgramWithStatefulAccess(hwInfo); + + if (failBuildProgram) { + success = false; + } + if (false == success) { return false; } @@ -554,7 +571,6 @@ bool ModuleImp::initialize(const ze_module_desc_t *desc, NEO::Device *neoDevice) passDebugData(); } - auto &hwInfo = neoDevice->getHardwareInfo(); auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); if (this->isFullyLinked && this->type == ModuleType::User) { diff --git a/level_zero/core/source/module/module_imp.h b/level_zero/core/source/module/module_imp.h index 77b288ace4..3e61fc12d3 100644 --- a/level_zero/core/source/module/module_imp.h +++ b/level_zero/core/source/module/module_imp.h @@ -126,7 +126,7 @@ struct ModuleImp : public Module { Device *getDevice() const override { return device; } - bool linkBinary(); + MOCKABLE_VIRTUAL bool linkBinary(); bool initialize(const ze_module_desc_t *desc, NEO::Device *neoDevice); diff --git a/level_zero/core/test/unit_tests/sources/module/test_module.cpp b/level_zero/core/test/unit_tests/sources/module/test_module.cpp index cd548a9f95..cb16f05cce 100644 --- a/level_zero/core/test/unit_tests/sources/module/test_module.cpp +++ b/level_zero/core/test/unit_tests/sources/module/test_module.cpp @@ -10,6 +10,8 @@ #include "shared/source/device_binary_format/debug_zebin.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" +#include "shared/source/helpers/addressing_mode_helper.h" +#include "shared/source/helpers/compiler_hw_info_config.h" #include "shared/source/kernel/implicit_args.h" #include "shared/source/program/kernel_info.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" @@ -2079,28 +2081,19 @@ HWTEST_F(ModuleTranslationUnitTest, WhenBuildOptionsAreNullThenReuseExistingOpti EXPECT_NE(pMockCompilerInterface->inputInternalOptions.find("cl-intel-greater-than-4GB-buffer-required"), std::string::npos); } -HWTEST_F(ModuleTranslationUnitTest, givenSystemSharedAllocationAllowedWhenBuildingModuleThen4GbBuffersAreRequired) { +HWTEST_F(ModuleTranslationUnitTest, givenForceToStatelessRequiredWhenBuildingModuleThen4GbBuffersAreRequired) { auto mockCompilerInterface = new MockCompilerInterface; auto &rootDeviceEnvironment = neoDevice->executionEnvironment->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]; rootDeviceEnvironment->compilerInterface.reset(mockCompilerInterface); - { - neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.sharedSystemMemCapabilities = 1; - - MockModuleTranslationUnit moduleTu(device); - auto ret = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr); - EXPECT_TRUE(ret); + MockModuleTranslationUnit moduleTu(device); + auto ret = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr); + EXPECT_TRUE(ret); + const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily); + if (compilerHwInfoConfig.isForceToStatelessRequired()) { EXPECT_NE(mockCompilerInterface->inputInternalOptions.find("cl-intel-greater-than-4GB-buffer-required"), std::string::npos); - } - - { - neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.sharedSystemMemCapabilities = 0; - - MockModuleTranslationUnit moduleTu(device); - auto ret = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr); - EXPECT_TRUE(ret); - + } else { EXPECT_EQ(mockCompilerInterface->inputInternalOptions.find("cl-intel-greater-than-4GB-buffer-required"), std::string::npos); } } @@ -2308,6 +2301,110 @@ TEST_F(ModuleTest, GivenInjectInternalBuildOptionsWhenBuildingBuiltinModuleThenI EXPECT_FALSE(CompilerOptions::contains(cip->buildInternalOptions, "-abc")); }; +TEST_F(ModuleTest, whenContainsStatefulAccessIsCalledThenResultIsCorrect) { + class MyModuleImpl : public ModuleImp { + public: + using ModuleImp::ModuleImp; + }; + + std::vector> testParams = { + {false, undefined, undefined}, + {true, 0x40, undefined}, + {true, undefined, 0x40}, + {true, 0x40, 0x40}, + }; + + for (auto &[expectedResult, surfaceStateHeapOffset, crossThreadDataOffset] : testParams) { + auto module = std::make_unique(device, nullptr, ModuleType::User); + ASSERT_NE(nullptr, module); + auto moduleTranslationUnit = module->getTranslationUnit(); + ASSERT_NE(nullptr, moduleTranslationUnit); + auto kernelInfo = std::make_unique(); + kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.clear(); + auto argDescriptor = ArgDescriptor(ArgDescriptor::ArgTPointer); + argDescriptor.as().bindful = surfaceStateHeapOffset; + argDescriptor.as().bindless = crossThreadDataOffset; + kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor); + moduleTranslationUnit->programInfo.kernelInfos.clear(); + moduleTranslationUnit->programInfo.kernelInfos.push_back(kernelInfo.release()); + + EXPECT_EQ(expectedResult, NEO::AddressingModeHelper::containsStatefulAccess(moduleTranslationUnit->programInfo.kernelInfos)); + } +} + +using ModuleInitializeTest = Test; + +TEST_F(ModuleInitializeTest, whenModuleInitializeIsCalledThenCorrectResultIsReturned) { + class MockModuleImp : public ModuleImp { + public: + using ModuleImp::isFullyLinked; + using ModuleImp::ModuleImp; + using ModuleImp::translationUnit; + + bool linkBinary() override { + return true; + } + + void setAddressingMode(bool isStateful) { + auto kernelInfo = std::make_unique(); + kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.clear(); + auto argDescriptor = ArgDescriptor(ArgDescriptor::ArgTPointer); + if (isStateful) { + argDescriptor.as().bindful = 0x40; + argDescriptor.as().bindless = 0x40; + } else { + argDescriptor.as().bindful = undefined; + argDescriptor.as().bindless = undefined; + } + kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor); + kernelInfo->heapInfo.KernelHeapSize = 0x1; + kernelInfo->heapInfo.pKernelHeap = reinterpret_cast(0xffff); + + this->translationUnit->programInfo.kernelInfos.clear(); + this->translationUnit->programInfo.kernelInfos.push_back(kernelInfo.release()); + } + }; + + class MyMockModuleTU : public MockModuleTU { + public: + using MockModuleTU::MockModuleTU; + bool createFromNativeBinary(const char *input, size_t inputSize) override { return true; } + }; + + const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily); + if (!compilerHwInfoConfig.isForceToStatelessRequired()) { + GTEST_SKIP(); + } + + DebugManagerStateRestore restorer; + std::string testFile; + retrieveBinaryKernelFilenameApiSpecific(testFile, "test_kernel_", ".bin"); + size_t size = 0; + auto src = loadDataFromFile(testFile.c_str(), size); + ASSERT_NE(0u, size); + ASSERT_NE(nullptr, src); + ze_module_desc_t moduleDesc = {}; + moduleDesc.format = ZE_MODULE_FORMAT_NATIVE; + moduleDesc.pInputModule = reinterpret_cast(src.get()); + moduleDesc.inputSize = size; + + std::array, 5> testParams = {{ + {true, false, ModuleType::Builtin, -1}, + {true, true, ModuleType::Builtin, 0}, + {true, true, ModuleType::User, 0}, + {true, true, ModuleType::Builtin, 1}, + {false, true, ModuleType::User, 1}, + }}; + + for (auto &[expectedResult, isStateful, moduleType, debugKey] : testParams) { + MockModuleImp module(device, nullptr, moduleType); + module.translationUnit = std::make_unique(device); + DebugManager.flags.FailBuildProgramWithStatefulAccess.set(debugKey); + module.setAddressingMode(isStateful); + EXPECT_EQ(expectedResult, module.initialize(&moduleDesc, device->getNEODevice())); + } +} + using ModuleDebugDataTest = Test; TEST_F(ModuleDebugDataTest, GivenDebugDataWithRelocationsWhenCreatingRelocatedDebugDataThenRelocationsAreApplied) { auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions(); diff --git a/opencl/source/program/build.cpp b/opencl/source/program/build.cpp index 3099616f31..c28b170704 100644 --- a/opencl/source/program/build.cpp +++ b/opencl/source/program/build.cpp @@ -10,6 +10,7 @@ #include "shared/source/device/device.h" #include "shared/source/device_binary_format/device_binary_formats.h" #include "shared/source/execution_environment/execution_environment.h" +#include "shared/source/helpers/addressing_mode_helper.h" #include "shared/source/helpers/compiler_options_parser.h" #include "shared/source/program/kernel_info.h" #include "shared/source/source_level_debugger/source_level_debugger.h" @@ -177,12 +178,22 @@ cl_int Program::build( phaseReached[clDevice->getRootDeviceIndex()] = BuildPhase::BinaryProcessing; } + auto containsStatefulAccess = AddressingModeHelper::containsStatefulAccess(buildInfos[clDevices[0]->getRootDeviceIndex()].kernelInfoArray); + auto isUserKernel = !isBuiltIn; + + auto failBuildProgram = (containsStatefulAccess && + isUserKernel && + AddressingModeHelper::failBuildProgramWithStatefulAccess(clDevices[0]->getHardwareInfo())); + + if (failBuildProgram) { + retVal = CL_BUILD_PROGRAM_FAILURE; + } + if (retVal != CL_SUCCESS) { break; } if (isKernelDebugEnabled() || gtpinIsGTPinInitialized()) { - for (auto &clDevice : deviceVector) { auto rootDeviceIndex = clDevice->getRootDeviceIndex(); if (BuildPhase::DebugDataNotification == phaseReached[rootDeviceIndex]) { diff --git a/opencl/source/program/program.cpp b/opencl/source/program/program.cpp index b005f14278..649f727235 100644 --- a/opencl/source/program/program.cpp +++ b/opencl/source/program/program.cpp @@ -73,8 +73,12 @@ std::string Program::getInternalOptions() const { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::arch32bit); } - if ((isBuiltIn && is32bit) || pClDevice->areSharedSystemAllocationsAllowed() || - DebugManager.flags.DisableStatelessToStatefulOptimization.get()) { + auto &hwInfo = pClDevice->getHardwareInfo(); + const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(hwInfo.platform.eProductFamily); + auto forceToStatelessRequired = compilerHwInfoConfig.isForceToStatelessRequired(); + auto disableStatelessToStatefulOptimization = DebugManager.flags.DisableStatelessToStatefulOptimization.get(); + + if ((isBuiltIn && is32bit) || forceToStatelessRequired || disableStatelessToStatefulOptimization) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::greaterThan4gbBuffersRequired); } @@ -91,7 +95,6 @@ std::string Program::getInternalOptions() const { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::hasBufferOffsetArg); } - auto &hwInfo = pClDevice->getHardwareInfo(); const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); if (hwInfoConfig.isForceEmuInt32DivRemSPWARequired(hwInfo)) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::forceEmuInt32DivRemSP); diff --git a/opencl/test/unit_test/built_ins/built_in_tests.cpp b/opencl/test/unit_test/built_ins/built_in_tests.cpp index fae9865979..f419f39184 100644 --- a/opencl/test/unit_test/built_ins/built_in_tests.cpp +++ b/opencl/test/unit_test/built_ins/built_in_tests.cpp @@ -257,10 +257,10 @@ TEST_F(BuiltInTests, WhenBuildingListOfBuiltinsThenBuiltinsHaveBeenGenerated) { uint64_t hash = Hash::hash(allBuiltIns.c_str(), allBuiltIns.length()); auto hashName = getBuiltInHashFileName(hash, supportsImages); - //First fail, if we are inconsistent + // First fail, if we are inconsistent EXPECT_EQ(true, fileExists(hashName)) << "**********\nBuilt in kernels need to be regenerated for the mock compilers!\n**********"; - //then write to file if needed + // then write to file if needed #define GENERATE_NEW_HASH_FOR_BUILT_INS 0 #if GENERATE_NEW_HASH_FOR_BUILT_INS std::cout << "writing builtins to file: " << hashName << std::endl; @@ -1527,17 +1527,21 @@ TEST_F(BuiltInTests, GivenTypeSourceWhenCreatingProgramFromCodeThenValidPointerI EXPECT_NE(nullptr, program.get()); } -TEST_F(BuiltInTests, givenCreateProgramFromSourceWhenDeviceSupportSharedSystemAllocationThenInternalOptionsDisableStosoFlag) { +TEST_F(BuiltInTests, givenCreateProgramFromSourceWhenForceToStatelessRequiredOr32BitThenInternalOptionsHasGreaterThan4gbBuffersRequired) { auto builtinsLib = std::unique_ptr(new BuiltinsLib()); - pClDevice->deviceInfo.sharedSystemMemCapabilities = CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL; - pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.sharedSystemMemCapabilities = 1; const BuiltinCode bc = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Source, *pDevice); EXPECT_NE(0u, bc.resource.size()); auto program = std::unique_ptr(BuiltinDispatchInfoBuilder::createProgramFromCode(bc, toClDeviceVector(*pClDevice))); EXPECT_NE(nullptr, program.get()); auto builtinInternalOptions = program->getInternalOptions(); - EXPECT_TRUE(hasSubstr(builtinInternalOptions, std::string(CompilerOptions::greaterThan4gbBuffersRequired))); + + const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily); + if (compilerHwInfoConfig.isForceToStatelessRequired() || is32bit) { + EXPECT_THAT(builtinInternalOptions, testing::HasSubstr(std::string(CompilerOptions::greaterThan4gbBuffersRequired))); + } else { + EXPECT_THAT(builtinInternalOptions, testing::Not(testing::HasSubstr(std::string(CompilerOptions::greaterThan4gbBuffersRequired)))); + } } TEST_F(BuiltInTests, GivenTypeIntermediateWhenCreatingProgramFromCodeThenNullPointerIsReturned) { @@ -1586,7 +1590,9 @@ TEST_F(BuiltInTests, GivenForce32bitWhenCreatingProgramThenCorrectKernelIsCreate EXPECT_EQ(std::string::npos, it); it = builtinInternalOptions.find(NEO::CompilerOptions::greaterThan4gbBuffersRequired.data()); - if (is32bit || pDevice->areSharedSystemAllocationsAllowed()) { + const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily); + + if (is32bit || compilerHwInfoConfig.isForceToStatelessRequired()) { EXPECT_NE(std::string::npos, it); } else { EXPECT_EQ(std::string::npos, it); diff --git a/opencl/test/unit_test/program/program_tests.cpp b/opencl/test/unit_test/program/program_tests.cpp index 32d726edc6..5c41a84e30 100644 --- a/opencl/test/unit_test/program/program_tests.cpp +++ b/opencl/test/unit_test/program/program_tests.cpp @@ -14,7 +14,9 @@ #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/device_binary_format/patchtokens_decoder.h" #include "shared/source/gmm_helper/gmm_helper.h" +#include "shared/source/helpers/addressing_mode_helper.h" #include "shared/source/helpers/aligned_memory.h" +#include "shared/source/helpers/compiler_hw_info_config.h" #include "shared/source/helpers/hash.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/ptr_math.h" @@ -1140,7 +1142,9 @@ TEST_F(ProgramFromSourceTest, GivenFlagsWhenCompilingProgramThenBuildOptionsHave // Check build options that were applied EXPECT_TRUE(CompilerOptions::contains(cip->buildOptions, CompilerOptions::fastRelaxedMath)) << cip->buildOptions; EXPECT_FALSE(CompilerOptions::contains(cip->buildInternalOptions, CompilerOptions::gtpinRera)) << cip->buildInternalOptions; - if (!pDevice->areSharedSystemAllocationsAllowed()) { + + const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily); + if (!compilerHwInfoConfig.isForceToStatelessRequired()) { EXPECT_FALSE(CompilerOptions::contains(cip->buildInternalOptions, CompilerOptions::greaterThan4gbBuffersRequired)) << cip->buildInternalOptions; } EXPECT_TRUE(CompilerOptions::contains(cip->buildInternalOptions, pPlatform->getClDevice(0)->peekCompilerExtensions())) << cip->buildInternalOptions; @@ -1730,14 +1734,6 @@ TEST_F(ProgramTests, WhenCreatingProgramThenBindlessIsEnabledOnlyIfDebugFlagIsEn } } -TEST_F(ProgramTests, givenDeviceThatSupportsSharedSystemMemoryAllocationWhenProgramIsCompiledThenItForcesStatelessCompilation) { - pClDevice->deviceInfo.sharedSystemMemCapabilities = CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL; - pClDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.sharedSystemMemCapabilities = 1; - MockProgram program(pContext, false, toClDeviceVector(*pClDevice)); - auto internalOptions = program.getInternalOptions(); - EXPECT_TRUE(CompilerOptions::contains(internalOptions.c_str(), CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions; -} - TEST_F(ProgramTests, GivenForce32BitAddressessWhenProgramIsCreatedThenGreaterThan4gbBuffersRequiredIsCorrectlySet) { DebugManagerStateRestore dbgRestorer; cl_int retVal = CL_DEVICE_NOT_FOUND; @@ -1746,7 +1742,8 @@ TEST_F(ProgramTests, GivenForce32BitAddressessWhenProgramIsCreatedThenGreaterTha const_cast(&pDevice->getDeviceInfo())->force32BitAddressess = true; MockProgram program(pContext, false, toClDeviceVector(*pClDevice)); auto internalOptions = program.getInternalOptions(); - if (pDevice->areSharedSystemAllocationsAllowed()) { + const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily); + if (compilerHwInfoConfig.isForceToStatelessRequired()) { EXPECT_TRUE(CompilerOptions::contains(internalOptions, CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions; } else { EXPECT_FALSE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions; @@ -1761,10 +1758,12 @@ TEST_F(ProgramTests, Given32bitSupportWhenProgramIsCreatedThenGreaterThan4gbBuff DebugManager.flags.DisableStatelessToStatefulOptimization.set(false); std::unique_ptr program{Program::createBuiltInFromSource("", pContext, pContext->getDevices(), nullptr)}; auto internalOptions = program->getInternalOptions(); - if ((false == pDevice->areSharedSystemAllocationsAllowed()) && (false == is32bit)) { - EXPECT_FALSE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions; - } else { + const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily); + + if (compilerHwInfoConfig.isForceToStatelessRequired() || is32bit) { EXPECT_TRUE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions; + } else { + EXPECT_FALSE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions; } } @@ -1788,14 +1787,101 @@ TEST_F(ProgramTests, Force32BitAddressessWhenProgramIsCreatedThenGreaterThan4gbB const_cast(&pDevice->getDeviceInfo())->force32BitAddressess = true; std::unique_ptr program{Program::createBuiltInFromSource("", pContext, pContext->getDevices(), nullptr)}; auto internalOptions = program->getInternalOptions(); - if (is32bit) { + const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily); + if (is32bit || compilerHwInfoConfig.isForceToStatelessRequired()) { EXPECT_TRUE(CompilerOptions::contains(internalOptions, CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions; } else { - if (false == pDevice->areSharedSystemAllocationsAllowed()) { - EXPECT_FALSE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions; - } else { - EXPECT_TRUE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions; + EXPECT_FALSE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions; + } +} + +TEST_F(ProgramTests, whenContainsStatefulAccessIsCalledThenReturnCorrectResult) { + std::vector> testParams = { + {false, undefined, undefined}, + {true, 0x40, undefined}, + {true, undefined, 0x40}, + {true, 0x40, 0x40}, + + }; + + for (auto &[expectedResult, surfaceStateHeapOffset, crossThreadDataOffset] : testParams) { + MockProgram program(pContext, false, toClDeviceVector(*pClDevice)); + auto kernelInfo = std::make_unique(); + kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.clear(); + auto argDescriptor = ArgDescriptor(ArgDescriptor::ArgTPointer); + argDescriptor.as().bindful = surfaceStateHeapOffset; + argDescriptor.as().bindless = crossThreadDataOffset; + + kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor); + program.addKernelInfo(kernelInfo.release(), 0); + + EXPECT_EQ(expectedResult, AddressingModeHelper::containsStatefulAccess(program.buildInfos[0].kernelInfoArray)); + } +} + +TEST_F(ProgramTests, givenStatefulAndStatelessAccessesWhenProgramBuildIsCalledThenCorrectResultIsReturned) { + DebugManagerStateRestore restorer; + const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(pClDevice->getHardwareInfo().platform.eProductFamily); + + class MyMockProgram : public Program { + public: + using Program::buildInfos; + using Program::createdFrom; + using Program::irBinary; + using Program::irBinarySize; + using Program::isBuiltIn; + using Program::options; + using Program::Program; + using Program::sourceCode; + + void setAddressingMode(bool isStateful) { + auto kernelInfo = std::make_unique(); + kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.clear(); + auto argDescriptor = ArgDescriptor(ArgDescriptor::ArgTPointer); + if (isStateful) { + argDescriptor.as().bindful = 0x40; + argDescriptor.as().bindless = 0x40; + } else { + argDescriptor.as().bindful = undefined; + argDescriptor.as().bindless = undefined; + } + + kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor); + this->buildInfos[0].kernelInfoArray.clear(); + this->buildInfos[0].kernelInfoArray.push_back(kernelInfo.release()); } + + cl_int processGenBinary(const ClDevice &clDevice) override { + return CL_SUCCESS; + } + }; + + std::array, 3> testParams = {{{CL_SUCCESS, false, -1}, + {CL_SUCCESS, true, 0}, + {CL_BUILD_PROGRAM_FAILURE, true, 1}}}; + + for (auto &[result, isStatefulAccess, debuyKey] : testParams) { + + if (!compilerHwInfoConfig.isForceToStatelessRequired()) { + result = CL_SUCCESS; + } + MyMockProgram program(pContext, false, toClDeviceVector(*pClDevice)); + program.isBuiltIn = false; + program.sourceCode = "test_kernel"; + program.createdFrom = Program::CreatedFrom::SOURCE; + program.setAddressingMode(isStatefulAccess); + DebugManager.flags.FailBuildProgramWithStatefulAccess.set(debuyKey); + EXPECT_EQ(result, program.build(toClDeviceVector(*pClDevice), nullptr, false)); + } + + { + MyMockProgram programWithBuiltIn(pContext, true, toClDeviceVector(*pClDevice)); + programWithBuiltIn.isBuiltIn = true; + programWithBuiltIn.irBinary.reset(new char[16]); + programWithBuiltIn.irBinarySize = 16; + programWithBuiltIn.setAddressingMode(true); + DebugManager.flags.FailBuildProgramWithStatefulAccess.set(1); + EXPECT_EQ(CL_SUCCESS, programWithBuiltIn.build(toClDeviceVector(*pClDevice), nullptr, false)); } } diff --git a/opencl/test/unit_test/test_files/igdrcl.config b/opencl/test/unit_test/test_files/igdrcl.config index 502fafe24b..5b3dece925 100644 --- a/opencl/test/unit_test/test_files/igdrcl.config +++ b/opencl/test/unit_test/test_files/igdrcl.config @@ -422,3 +422,4 @@ DirectSubmissionInsertSfenceInstructionPriorToSubmission = -1 EnableTimestampWaitForEvents = -1 ForceWddmLowPriorityContextValue = -1 EnableDebuggerMmapMemoryAccess = 0 +FailBuildProgramWithStatefulAccess = -1 \ No newline at end of file diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index fde48eef0c..d842c047ac 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -203,6 +203,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, SetVmAdviseAtomicAttribute, -1, "-1: default - a DECLARE_DEBUG_VARIABLE(int32_t, ReadBackCommandBufferAllocation, -1, "Read command buffer allocation back on the host side. -1: default, 0 - disabled, 1 - local memory only, 2 - local and system memory") DECLARE_DEBUG_VARIABLE(int32_t, UseContextEndOffsetForEventCompletion, -1, "Use Context End or Context Start for event completion signalling. -1: default: platform dependent, 0 - Use Context Start, 1 - Use Context End") DECLARE_DEBUG_VARIABLE(int32_t, ForceWddmLowPriorityContextValue, -1, "Force scheduling priority value during Wddm low priority context creation. -1 - default.") +DECLARE_DEBUG_VARIABLE(int32_t, FailBuildProgramWithStatefulAccess, -1, "-1: default, 0: disable, 1: enable, Fail build program/module creation whenever stateful access is discovered (except built in kernels).") DECLARE_DEBUG_VARIABLE(bool, DisableScratchPages, false, "Disable scratch pages during VM creations") /*LOGGING FLAGS*/ DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level") diff --git a/shared/source/helpers/CMakeLists.txt b/shared/source/helpers/CMakeLists.txt index ba45490538..154586972d 100644 --- a/shared/source/helpers/CMakeLists.txt +++ b/shared/source/helpers/CMakeLists.txt @@ -8,6 +8,8 @@ set(NEO_CORE_HELPERS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/abort.h ${CMAKE_CURRENT_SOURCE_DIR}/address_patch.h + ${CMAKE_CURRENT_SOURCE_DIR}/addressing_mode_helper.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/addressing_mode_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/affinity_mask.h ${CMAKE_CURRENT_SOURCE_DIR}/aligned_memory.h ${CMAKE_CURRENT_SOURCE_DIR}/app_resource_defines.h diff --git a/shared/source/helpers/addressing_mode_helper.cpp b/shared/source/helpers/addressing_mode_helper.cpp new file mode 100644 index 0000000000..e6e7e7930a --- /dev/null +++ b/shared/source/helpers/addressing_mode_helper.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "addressing_mode_helper.h" + +#include "shared/source/debug_settings/debug_settings_manager.h" +#include "shared/source/helpers/compiler_hw_info_config.h" +#include "shared/source/program/kernel_info.h" + +namespace NEO::AddressingModeHelper { + +bool failBuildProgramWithStatefulAccess(const HardwareInfo &hwInfo) { + auto failBuildProgram = false; + if (NEO::DebugManager.flags.FailBuildProgramWithStatefulAccess.get() != -1) { + failBuildProgram = static_cast(NEO::DebugManager.flags.FailBuildProgramWithStatefulAccess.get()); + } + + const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(hwInfo.platform.eProductFamily); + auto forceToStatelessRequired = compilerHwInfoConfig.isForceToStatelessRequired(); + + return failBuildProgram && forceToStatelessRequired; +} + +bool containsStatefulAccess(const std::vector &kernelInfos) { + for (const auto &kernelInfo : kernelInfos) { + for (const auto &arg : kernelInfo->kernelDescriptor.payloadMappings.explicitArgs) { + auto isStatefulAccess = arg.is() && + (NEO::isValidOffset(arg.as().bindless) || + NEO::isValidOffset(arg.as().bindful)); + if (isStatefulAccess) { + return true; + } + } + } + return false; +} + +} // namespace NEO::AddressingModeHelper diff --git a/shared/source/helpers/addressing_mode_helper.h b/shared/source/helpers/addressing_mode_helper.h new file mode 100644 index 0000000000..79dd6b4225 --- /dev/null +++ b/shared/source/helpers/addressing_mode_helper.h @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include + +namespace NEO { +struct KernelInfo; +struct HardwareInfo; + +namespace AddressingModeHelper { +bool failBuildProgramWithStatefulAccess(const HardwareInfo &hwInfo); +bool containsStatefulAccess(const std::vector &kernelInfos); + +} // namespace AddressingModeHelper +} // namespace NEO