From faea7915c2047aaf463c24c1a6904a1c4af1c1c4 Mon Sep 17 00:00:00 2001 From: Compute-Runtime-Validation Date: Fri, 5 Nov 2021 20:04:32 +0100 Subject: [PATCH] Revert "Fail build program in shared system USM + statefull access case OCL" This reverts commit 9dabc2db0c3cbecb838a97d000d9385532f44c5a. Signed-off-by: Compute-Runtime-Validation --- opencl/source/program/build.cpp | 10 +- opencl/source/program/program.cpp | 33 +------ opencl/source/program/program.h | 5 - opencl/test/unit_test/mocks/mock_program.h | 1 - .../test/unit_test/program/program_tests.cpp | 99 ------------------- .../test/unit_test/test_files/igdrcl.config | 1 - .../unit_test/test_files/kernel_num_args.cl | 1 - .../test_files/test_constant_memory.cl | 2 +- .../compiler_options/compiler_options_base.h | 1 - .../debug_settings/debug_variables_base.inl | 1 - .../helpers/compiler_hw_info_config_base.inl | 4 + .../compiler_hw_info_config_bdw_and_later.inl | 5 - 12 files changed, 8 insertions(+), 155 deletions(-) diff --git a/opencl/source/program/build.cpp b/opencl/source/program/build.cpp index 4de6c5b1eb..ec492a794f 100644 --- a/opencl/source/program/build.cpp +++ b/opencl/source/program/build.cpp @@ -34,7 +34,8 @@ cl_int Program::build( const char *buildOptions, bool enableCaching) { cl_int retVal = CL_SUCCESS; - + std::string internalOptions; + initInternalOptions(internalOptions); auto defaultClDevice = deviceVector[0]; UNRECOVERABLE_IF(defaultClDevice == nullptr); auto &defaultDevice = defaultClDevice->getDevice(); @@ -68,9 +69,6 @@ cl_int Program::build( } else if (this->createdFrom != CreatedFrom::BINARY) { options = ""; } - std::string internalOptions; - initInternalOptions(internalOptions); - extractInternalOptions(options, internalOptions); applyAdditionalOptions(internalOptions); @@ -168,10 +166,6 @@ cl_int Program::build( phaseReached[clDevice->getRootDeviceIndex()] = BuildPhase::BinaryProcessing; } - if (containsStatefulAccess(defaultDevice.getRootDeviceIndex()) && forceToStatelessNeeded() && !isBuiltIn) { - retVal = CL_BUILD_PROGRAM_FAILURE; - } - if (retVal != CL_SUCCESS) { break; } diff --git a/opencl/source/program/program.cpp b/opencl/source/program/program.cpp index 7dd07d6297..c0b847c5b4 100644 --- a/opencl/source/program/program.cpp +++ b/opencl/source/program/program.cpp @@ -66,7 +66,6 @@ Program::Program(Context *context, bool isBuiltIn, const ClDeviceVector &clDevic kernelDebugEnabled = clDevices[0]->isDebuggerActive(); } void Program::initInternalOptions(std::string &internalOptions) const { - auto pClDevice = clDevices[0]; auto force32BitAddressess = pClDevice->getSharedDeviceInfo().force32BitAddressess; internalOptions = getOclVersionCompilerInternalOption(pClDevice->getEnabledClVersion()); @@ -75,7 +74,7 @@ void Program::initInternalOptions(std::string &internalOptions) const { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::arch32bit); } - if ((isBuiltIn && is32bit) || forceToStatelessNeeded() || + if ((isBuiltIn && is32bit) || pClDevice->areSharedSystemAllocationsAllowed() || DebugManager.flags.DisableStatelessToStatefulOptimization.get()) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::greaterThan4gbBuffersRequired); } @@ -137,20 +136,6 @@ Program::~Program() { } } -bool Program::forceToStatelessNeeded() const { - auto preferStateful = false; - if (auto it = options.find(NEO::CompilerOptions::smallerThan4gbBuffersOnly.data()); it != std::string::npos) { - preferStateful = true; - } - - if (DebugManager.flags.UseSmallerThan4gbBuffersOnly.get() != -1) { - preferStateful = static_cast(DebugManager.flags.UseSmallerThan4gbBuffersOnly.get()); - } - - auto forceStateless = !preferStateful && clDevices[0]->areSharedSystemAllocationsAllowed(); - return forceStateless; -} - cl_int Program::createProgramFromBinary( const void *pBinary, size_t binarySize, ClDevice &clDevice) { @@ -504,22 +489,6 @@ cl_int Program::packDeviceBinary(ClDevice &clDevice) { return CL_SUCCESS; } -bool Program::containsStatefulAccess(uint32_t rootDeviceIndex) const { - auto &buildInfo = buildInfos[rootDeviceIndex]; - for (const auto &kernelInfo : buildInfo.kernelInfoArray) { - for (const auto &arg : kernelInfo->kernelDescriptor.payloadMappings.explicitArgs) { - auto isStatefulAccess = arg.is() && - (isValidOffset(arg.as().bindless) || - isValidOffset(arg.as().bindful)); - if (isStatefulAccess) { - return true; - } - } - } - - return false; -} - void Program::setBuildStatus(cl_build_status status) { for (auto &deviceBuildInfo : deviceBuildInfos) { deviceBuildInfo.second.buildStatus = status; diff --git a/opencl/source/program/program.h b/opencl/source/program/program.h index 9182e6548e..946202411e 100644 --- a/opencl/source/program/program.h +++ b/opencl/source/program/program.h @@ -283,10 +283,6 @@ class Program : public BaseObject<_cl_program> { } protected: - bool forceToStatelessNeeded() const; - - MOCKABLE_VIRTUAL bool containsStatefulAccess(uint32_t rootDeviceIndex) const; - MOCKABLE_VIRTUAL cl_int createProgramFromBinary(const void *pBinary, size_t binarySize, ClDevice &clDevice); cl_int packDeviceBinary(ClDevice &clDevice); @@ -370,7 +366,6 @@ class Program : public BaseObject<_cl_program> { bool isBuiltIn = false; bool kernelDebugEnabled = false; - bool containsStatefulAccesses = false; uint32_t maxRootDeviceIndex = std::numeric_limits::max(); std::mutex lockMutex; uint32_t exposedKernels = 0; diff --git a/opencl/test/unit_test/mocks/mock_program.h b/opencl/test/unit_test/mocks/mock_program.h index 7e1d98be1e..fe27202468 100644 --- a/opencl/test/unit_test/mocks/mock_program.h +++ b/opencl/test/unit_test/mocks/mock_program.h @@ -40,7 +40,6 @@ class MockProgram : public Program { using Program::debugDataSize; using Program::deviceBuildInfos; using Program::extractInternalOptions; - using Program::forceToStatelessNeeded; using Program::getKernelInfo; using Program::internalOptionsToExtract; using Program::irBinary; diff --git a/opencl/test/unit_test/program/program_tests.cpp b/opencl/test/unit_test/program/program_tests.cpp index 6023223cb8..26ccb0b52f 100644 --- a/opencl/test/unit_test/program/program_tests.cpp +++ b/opencl/test/unit_test/program/program_tests.cpp @@ -1653,105 +1653,6 @@ TEST_F(ProgramTests, WhenProgramIsCreatedThenCorrectOclVersionIsInOptions) { } } -TEST_F(ProgramTests, whenForceToStatelessNeededIsCalledThenCorrectResultIsReturned) { - DebugManagerStateRestore restorer; - - class MyMockProgram : public Program { - public: - using Program::forceToStatelessNeeded; - using Program::options; - using Program::Program; - }; - - MyMockProgram program(pContext, false, toClDeviceVector(*pClDevice)); - - { - DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(-1); - program.options = ""; - EXPECT_EQ(program.forceToStatelessNeeded(), pClDevice->areSharedSystemAllocationsAllowed()); - } - { - DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(-1); - program.options = "-cl-opt-smaller-than-4GB-buffers-only"; - EXPECT_FALSE(program.forceToStatelessNeeded()); - } - { - DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0); - program.options = ""; - EXPECT_EQ(program.forceToStatelessNeeded(), pClDevice->areSharedSystemAllocationsAllowed()); - } - { - DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0); - program.options = "-cl-opt-smaller-than-4GB-buffers-only"; - EXPECT_EQ(program.forceToStatelessNeeded(), pClDevice->areSharedSystemAllocationsAllowed()); - } - { - DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(1); - program.options = ""; - EXPECT_FALSE(program.forceToStatelessNeeded()); - } - { - DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(1); - program.options = "-cl-opt-smaller-than-4GB-buffers-only"; - EXPECT_FALSE(program.forceToStatelessNeeded()); - } -} - -TEST_F(ProgramTests, givenStatefulAndStatelessAccessesWhenProgramBuildIsCalledThenCorrectResultIsReturned) { - DebugManagerStateRestore restorer; - - class MyMockProgram : public Program { - public: - using Program::containsStatefulAccess; - using Program::createdFrom; - using Program::irBinary; - using Program::irBinarySize; - using Program::isBuiltIn; - using Program::options; - using Program::Program; - using Program::sourceCode; - - bool containsStatefulAccess(uint32_t rootDeviceIndex) const override { - return hasStatefulAccess; - } - - bool hasStatefulAccess = false; - }; - - MyMockProgram program(pContext, false, toClDeviceVector(*pClDevice)); - program.isBuiltIn = false; - program.sourceCode = "test_kernel"; - program.createdFrom = Program::CreatedFrom::SOURCE; - - { - EXPECT_EQ(CL_SUCCESS, program.build(toClDeviceVector(*pClDevice), nullptr, false)); - } - { - program.hasStatefulAccess = true; - DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(1); - EXPECT_EQ(CL_SUCCESS, program.build(toClDeviceVector(*pClDevice), nullptr, false)); - } - { - program.hasStatefulAccess = true; - DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0); - if (pClDevice->areSharedSystemAllocationsAllowed()) { - EXPECT_EQ(CL_BUILD_PROGRAM_FAILURE, program.build(toClDeviceVector(*pClDevice), nullptr, false)); - } else { - EXPECT_EQ(CL_SUCCESS, program.build(toClDeviceVector(*pClDevice), nullptr, false)); - } - } - { - MyMockProgram programWithBuiltIn(pContext, true, toClDeviceVector(*pClDevice)); - programWithBuiltIn.irBinary.reset(new char[16]); - programWithBuiltIn.irBinarySize = 16; - - programWithBuiltIn.isBuiltIn = true; - programWithBuiltIn.hasStatefulAccess = true; - DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0); - EXPECT_EQ(CL_SUCCESS, programWithBuiltIn.build(toClDeviceVector(*pClDevice), nullptr, false)); - } -} - TEST_F(ProgramTests, GivenForcedClVersionWhenProgramIsCreatedThenCorrectOclOptionIsPresent) { std::pair testedValues[] = { {0, "-ocl-version=120"}, diff --git a/opencl/test/unit_test/test_files/igdrcl.config b/opencl/test/unit_test/test_files/igdrcl.config index 1d5a16978d..a9fcd76ed0 100644 --- a/opencl/test/unit_test/test_files/igdrcl.config +++ b/opencl/test/unit_test/test_files/igdrcl.config @@ -110,7 +110,6 @@ ForceCsrFlushing = 0 ForceCsrReprogramming = 0 OmitTimestampPacketDependencies = 0 DisableStatelessToStatefulOptimization = 0 -UseSmallerThan4gbBuffersOnly = -1 DisableConcurrentBlockExecution = 0 UseNoRingFlushesKmdMode = 1 DisableZeroCopyForUseHostPtr = 0 diff --git a/opencl/test/unit_test/test_files/kernel_num_args.cl b/opencl/test/unit_test/test_files/kernel_num_args.cl index ca2fe02067..4b2236b833 100644 --- a/opencl/test/unit_test/test_files/kernel_num_args.cl +++ b/opencl/test/unit_test/test_files/kernel_num_args.cl @@ -6,5 +6,4 @@ */ __kernel void test(__global float *argGlobal, __read_only image3d_t argImg3D, __constant float *argConst) { - argGlobal[0] = argConst[0]; } diff --git a/opencl/test/unit_test/test_files/test_constant_memory.cl b/opencl/test/unit_test/test_files/test_constant_memory.cl index 9436ad21a4..3e98f6fa92 100644 --- a/opencl/test/unit_test/test_files/test_constant_memory.cl +++ b/opencl/test/unit_test/test_files/test_constant_memory.cl @@ -10,6 +10,6 @@ __constant uint constant_a[2] = {0xabcd5432u, 0xaabb5533u}; __kernel void test(__global uint *in, __global uint *out) { int i = get_global_id(0); int j = get_global_id(0) % (sizeof(constant_a) / sizeof(constant_a[0])); - in[0] = 0; + out[i] = constant_a[j]; } diff --git a/shared/source/compiler_interface/compiler_options/compiler_options_base.h b/shared/source/compiler_interface/compiler_options/compiler_options_base.h index 5759c239bf..a92bd9a99d 100644 --- a/shared/source/compiler_interface/compiler_options/compiler_options_base.h +++ b/shared/source/compiler_interface/compiler_options/compiler_options_base.h @@ -13,7 +13,6 @@ namespace NEO { namespace CompilerOptions { static constexpr ConstStringRef greaterThan4gbBuffersRequired = "-cl-intel-greater-than-4GB-buffer-required"; -static constexpr ConstStringRef smallerThan4gbBuffersOnly = "-cl-opt-smaller-than-4GB-buffers-only"; static constexpr ConstStringRef hasBufferOffsetArg = "-cl-intel-has-buffer-offset-arg"; static constexpr ConstStringRef kernelDebugEnable = "-cl-kernel-debug-enable"; static constexpr ConstStringRef arch32bit = "-m32"; diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index d164830fbd..9b04d37fb2 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -207,7 +207,6 @@ DECLARE_DEBUG_VARIABLE(bool, DisableStatelessToStatefulOptimization, false, "Dis DECLARE_DEBUG_VARIABLE(bool, DisableConcurrentBlockExecution, false, "disables concurrent block kernel execution") DECLARE_DEBUG_VARIABLE(bool, UseNoRingFlushesKmdMode, true, "Windows only, passes flag to KMD that informs KMD to not emit any ring buffer flushes.") DECLARE_DEBUG_VARIABLE(bool, DisableZeroCopyForUseHostPtr, false, "When active all buffer allocations created with CL_MEM_USE_HOST_PTR flag will not share memory with CPU.") -DECLARE_DEBUG_VARIABLE(int32_t, UseSmallerThan4gbBuffersOnly, -1, " -1: default, 0: disabled, 1: enabled. When enabled driver will not force stateless accesses when shared system USM is active") DECLARE_DEBUG_VARIABLE(int32_t, EnableHostPtrTracking, -1, "Enable host ptr tracking: -1 - default platform setting, 0 - disabled, 1 - enabled") DECLARE_DEBUG_VARIABLE(int32_t, MaxHwThreadsPercent, 0, "If not zero then maximum number of used HW threads is capped to max * MaxHwThreadsPercent / 100") DECLARE_DEBUG_VARIABLE(int32_t, MinHwThreadsUnoccupied, 0, "If not zero then maximum number of used HW threads is reduced by MinHwThreadsUnoccupied") diff --git a/shared/source/helpers/compiler_hw_info_config_base.inl b/shared/source/helpers/compiler_hw_info_config_base.inl index d61241f95d..1f93f7ac5f 100644 --- a/shared/source/helpers/compiler_hw_info_config_base.inl +++ b/shared/source/helpers/compiler_hw_info_config_base.inl @@ -10,6 +10,10 @@ #include "shared/source/helpers/compiler_hw_info_config.h" namespace NEO { +template +bool CompilerHwInfoConfigHw::isForceToStatelessRequired() const { + return false; +} template bool CompilerHwInfoConfigHw::isForceEmuInt32DivRemSPRequired() const { diff --git a/shared/source/helpers/compiler_hw_info_config_bdw_and_later.inl b/shared/source/helpers/compiler_hw_info_config_bdw_and_later.inl index 4c6c730086..ea6c3238cb 100644 --- a/shared/source/helpers/compiler_hw_info_config_bdw_and_later.inl +++ b/shared/source/helpers/compiler_hw_info_config_bdw_and_later.inl @@ -16,9 +16,4 @@ bool CompilerHwInfoConfigHw::isMidThreadPreemptionSupported(const Ha return hwInfo.featureTable.ftrGpGpuMidThreadLevelPreempt; } -template -bool CompilerHwInfoConfigHw::isForceToStatelessRequired() const { - return false; -} - } // namespace NEO